2018-06-06 02:42:14 +00:00
// SPDX-License-Identifier: GPL-2.0
2009-12-14 23:14:59 +00:00
/*
* Copyright ( c ) 2009 , Christoph Hellwig
* All Rights Reserved .
2021-08-19 03:04:44 +00:00
*
* NOTE : none of these tracepoints shall be considered a stable kernel ABI
* as they can change at any time .
*
* Current conventions for printing numbers measuring specific units :
*
* agno : allocation group number
*
* agino : per - AG inode number
* ino : filesystem inode number
*
* agbno : per - AG block number in fs blocks
* startblock : physical block number for file mappings . This is either a
* segmented fsblock for data device mappings , or a rfsblock
* for realtime device mappings
* fsbcount : number of blocks in an extent , in fs blocks
*
* daddr : physical block number in 512 b blocks
* bbcount : number of blocks in a physical extent , in 512 b blocks
*
2023-08-10 14:48:09 +00:00
* rtx : physical rt extent number for extent mappings
* rtxcount : number of rt extents in an extent mapping
*
2021-08-19 03:04:44 +00:00
* owner : reverse - mapping owner , usually inodes
*
* fileoff : file offset , in fs blocks
* pos : file offset , in bytes
* bytecount : number of bytes
*
2024-04-15 21:54:45 +00:00
* dablk : directory or xattr block offset , in filesystem blocks
*
2021-08-19 03:04:44 +00:00
* disize : ondisk file size , in bytes
* isize : incore file size , in bytes
*
* forkoff : inode fork offset , in bytes
*
* ireccount : number of inode records
*
* Numbers describing space allocations ( blocks , extents , inodes ) should be
* formatted in hexadecimal .
2009-12-14 23:14:59 +00:00
*/
# undef TRACE_SYSTEM
# define TRACE_SYSTEM xfs
# if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
# define _TRACE_XFS_H
# include <linux/tracepoint.h>
struct xfs_agf ;
struct xfs_alloc_arg ;
struct xfs_attr_list_context ;
struct xfs_buf_log_item ;
struct xfs_da_args ;
struct xfs_da_node_entry ;
struct xfs_dquot ;
2011-10-11 15:14:11 +00:00
struct xfs_log_item ;
2012-06-14 14:22:15 +00:00
struct xlog ;
2013-11-01 04:27:18 +00:00
struct xlog_ticket ;
2010-04-13 05:06:46 +00:00
struct xlog_recover ;
struct xlog_recover_item ;
2019-08-26 19:08:10 +00:00
struct xlog_rec_header ;
2021-06-18 18:57:05 +00:00
struct xlog_in_core ;
2010-04-13 05:06:46 +00:00
struct xfs_buf_log_format ;
struct xfs_inode_log_format ;
2012-08-01 14:56:49 +00:00
struct xfs_bmbt_irec ;
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 01:08:36 +00:00
struct xfs_btree_cur ;
2023-12-28 07:24:10 +00:00
struct xfs_defer_op_type ;
2016-10-03 16:11:18 +00:00
struct xfs_refcount_irec ;
2017-03-28 21:56:37 +00:00
struct xfs_fsmap ;
2024-11-04 04:19:03 +00:00
struct xfs_fsmap_irec ;
2024-11-04 04:18:38 +00:00
struct xfs_group ;
2017-03-28 21:56:37 +00:00
struct xfs_rmap_irec ;
2019-08-26 19:08:10 +00:00
struct xfs_icreate_log ;
2024-11-04 04:18:33 +00:00
struct xfs_iunlink_item ;
2019-08-26 19:08:10 +00:00
struct xfs_owner_info ;
struct xfs_trans_res ;
struct xfs_inobt_rec_incore ;
2020-03-11 17:51:50 +00:00
union xfs_btree_ptr ;
2020-07-14 17:37:35 +00:00
struct xfs_dqtrx ;
2021-06-07 16:34:51 +00:00
struct xfs_icwalk ;
2023-02-12 22:14:56 +00:00
struct xfs_perag ;
2024-02-22 20:43:35 +00:00
struct xfbtree ;
struct xfs_btree_ops ;
2024-02-22 20:43:53 +00:00
struct xfs_bmap_intent ;
2024-04-15 21:54:17 +00:00
struct xfs_exchmaps_intent ;
struct xfs_exchmaps_req ;
2024-04-15 21:54:18 +00:00
struct xfs_exchrange ;
2024-04-22 16:47:55 +00:00
struct xfs_getparents ;
struct xfs_parent_irec ;
struct xfs_attrlist_cursor_kern ;
2024-07-02 18:22:50 +00:00
struct xfs_extent_free_item ;
2024-07-02 18:22:59 +00:00
struct xfs_rmap_intent ;
2024-07-02 18:23:07 +00:00
struct xfs_refcount_intent ;
2024-11-04 04:18:52 +00:00
struct xfs_metadir_update ;
2024-11-04 04:19:36 +00:00
struct xfs_rtgroup ;
2009-12-14 23:14:59 +00:00
2020-02-27 01:30:42 +00:00
# define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT , " ROOT " } , \
2020-02-27 01:30:43 +00:00
{ XFS_ATTR_SECURE , " SECURE " } , \
2024-04-22 16:47:38 +00:00
{ XFS_ATTR_INCOMPLETE , " INCOMPLETE " } , \
{ XFS_ATTR_PARENT , " PARENT " }
2020-02-27 01:30:42 +00:00
2009-12-21 14:03:03 +00:00
DECLARE_EVENT_CLASS ( xfs_attr_list_class ,
TP_PROTO ( struct xfs_attr_list_context * ctx ) ,
TP_ARGS ( ctx ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( u32 , hashval )
__field ( u32 , blkno )
__field ( u32 , offset )
2020-02-27 01:30:37 +00:00
__field ( void * , buffer )
2009-12-21 14:03:03 +00:00
__field ( int , bufsize )
__field ( int , count )
__field ( int , firstu )
__field ( int , dupcnt )
2020-02-27 01:30:42 +00:00
__field ( unsigned int , attr_filter )
2009-12-21 14:03:03 +00:00
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ctx - > dp ) - > i_sb - > s_dev ;
__entry - > ino = ctx - > dp - > i_ino ;
2020-02-27 01:30:43 +00:00
__entry - > hashval = ctx - > cursor . hashval ;
__entry - > blkno = ctx - > cursor . blkno ;
__entry - > offset = ctx - > cursor . offset ;
2020-02-27 01:30:37 +00:00
__entry - > buffer = ctx - > buffer ;
2009-12-21 14:03:03 +00:00
__entry - > bufsize = ctx - > bufsize ;
__entry - > count = ctx - > count ;
__entry - > firstu = ctx - > firstu ;
2020-02-27 01:30:42 +00:00
__entry - > attr_filter = ctx - > attr_filter ;
2009-12-21 14:03:03 +00:00
) ,
TP_printk ( " dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
2020-02-27 01:30:42 +00:00
" buffer %p size %u count %u firstu %u filter %s " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > hashval ,
__entry - > blkno ,
__entry - > offset ,
__entry - > dupcnt ,
2020-02-27 01:30:37 +00:00
__entry - > buffer ,
2009-12-21 14:03:03 +00:00
__entry - > bufsize ,
__entry - > count ,
__entry - > firstu ,
2020-02-27 01:30:42 +00:00
__print_flags ( __entry - > attr_filter , " | " ,
XFS_ATTR_FILTER_FLAGS )
2009-12-21 14:03:03 +00:00
)
)
2009-12-14 23:14:59 +00:00
# define DEFINE_ATTR_LIST_EVENT(name) \
2009-12-21 14:03:03 +00:00
DEFINE_EVENT ( xfs_attr_list_class , name , \
2009-12-14 23:14:59 +00:00
TP_PROTO ( struct xfs_attr_list_context * ctx ) , \
2009-12-21 14:03:03 +00:00
TP_ARGS ( ctx ) )
2009-12-14 23:14:59 +00:00
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_sf ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_sf_all ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_leaf ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_leaf_end ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_full ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_add ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_wrong_blk ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_list_notfound ) ;
2012-11-12 11:53:53 +00:00
DEFINE_ATTR_LIST_EVENT ( xfs_attr_leaf_list ) ;
DEFINE_ATTR_LIST_EVENT ( xfs_attr_node_list ) ;
2009-12-14 23:14:59 +00:00
2020-11-30 00:33:39 +00:00
TRACE_EVENT ( xlog_intent_recovery_failed ,
2023-12-28 07:24:10 +00:00
TP_PROTO ( struct xfs_mount * mp , const struct xfs_defer_op_type * ops ,
int error ) ,
TP_ARGS ( mp , ops , error ) ,
2020-11-30 00:33:39 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2023-12-28 07:24:10 +00:00
__string ( name , ops - > name )
2020-11-30 00:33:39 +00:00
__field ( int , error )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
2020-11-30 00:33:39 +00:00
__entry - > error = error ;
) ,
2023-12-28 07:24:10 +00:00
TP_printk ( " dev %d:%d optype %s error %d " ,
2020-11-30 00:33:39 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2023-12-28 07:24:10 +00:00
__get_str ( name ) ,
__entry - > error )
2020-11-30 00:33:39 +00:00
) ;
2010-05-24 08:25:57 +00:00
DECLARE_EVENT_CLASS ( xfs_perag_class ,
2024-11-04 04:18:32 +00:00
TP_PROTO ( const struct xfs_perag * pag , unsigned long caller_ip ) ,
2023-02-12 22:14:52 +00:00
TP_ARGS ( pag , caller_ip ) ,
2010-05-24 08:25:57 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( int , refcount )
2023-02-12 22:14:52 +00:00
__field ( int , active_refcount )
2010-05-24 08:25:57 +00:00
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( pag ) - > m_super - > s_dev ;
__entry - > agno = pag_agno ( pag ) ;
__entry - > refcount = atomic_read ( & pag - > pag_group . xg_ref ) ;
__entry - > active_refcount =
atomic_read ( & pag - > pag_group . xg_active_ref ) ;
2010-05-24 08:25:57 +00:00
__entry - > caller_ip = caller_ip ;
) ,
2023-02-12 22:14:52 +00:00
TP_printk ( " dev %d:%d agno 0x%x passive refs %d active refs %d caller %pS " ,
2010-05-24 08:25:57 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > refcount ,
2023-02-12 22:14:52 +00:00
__entry - > active_refcount ,
2010-05-24 08:25:57 +00:00
( char * ) __entry - > caller_ip )
) ;
# define DEFINE_PERAG_REF_EVENT(name) \
DEFINE_EVENT ( xfs_perag_class , name , \
2024-11-04 04:18:32 +00:00
TP_PROTO ( const struct xfs_perag * pag , unsigned long caller_ip ) , \
2023-02-12 22:14:52 +00:00
TP_ARGS ( pag , caller_ip ) )
2021-05-31 18:32:02 +00:00
DEFINE_PERAG_REF_EVENT ( xfs_perag_set_inode_tag ) ;
DEFINE_PERAG_REF_EVENT ( xfs_perag_clear_inode_tag ) ;
2024-08-29 04:08:41 +00:00
DEFINE_PERAG_REF_EVENT ( xfs_reclaim_inodes_count ) ;
2010-05-24 08:25:57 +00:00
2024-11-04 04:18:38 +00:00
TRACE_DEFINE_ENUM ( XG_TYPE_AG ) ;
2024-11-04 04:19:04 +00:00
TRACE_DEFINE_ENUM ( XG_TYPE_RTG ) ;
2024-11-04 04:18:38 +00:00
DECLARE_EVENT_CLASS ( xfs_group_class ,
TP_PROTO ( struct xfs_group * xg , unsigned long caller_ip ) ,
TP_ARGS ( xg , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( enum xfs_group_type , type )
__field ( xfs_agnumber_t , agno )
__field ( int , refcount )
__field ( int , active_refcount )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = xg - > xg_mount - > m_super - > s_dev ;
__entry - > type = xg - > xg_type ;
__entry - > agno = xg - > xg_gno ;
__entry - > refcount = atomic_read ( & xg - > xg_ref ) ;
__entry - > active_refcount = atomic_read ( & xg - > xg_active_ref ) ;
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " dev %d:%d %sno 0x%x passive refs %d active refs %d caller %pS " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__print_symbolic ( __entry - > type , XG_TYPE_STRINGS ) ,
__entry - > agno ,
__entry - > refcount ,
__entry - > active_refcount ,
( char * ) __entry - > caller_ip )
) ;
# define DEFINE_GROUP_REF_EVENT(name) \
DEFINE_EVENT ( xfs_group_class , name , \
TP_PROTO ( struct xfs_group * xg , unsigned long caller_ip ) , \
TP_ARGS ( xg , caller_ip ) )
DEFINE_GROUP_REF_EVENT ( xfs_group_get ) ;
DEFINE_GROUP_REF_EVENT ( xfs_group_hold ) ;
DEFINE_GROUP_REF_EVENT ( xfs_group_put ) ;
DEFINE_GROUP_REF_EVENT ( xfs_group_grab ) ;
DEFINE_GROUP_REF_EVENT ( xfs_group_grab_next_tag ) ;
DEFINE_GROUP_REF_EVENT ( xfs_group_rele ) ;
2021-08-06 18:05:43 +00:00
TRACE_EVENT ( xfs_inodegc_worker ,
TP_PROTO ( struct xfs_mount * mp , unsigned int shrinker_hits ) ,
TP_ARGS ( mp , shrinker_hits ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned int , shrinker_hits )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > shrinker_hits = shrinker_hits ;
) ,
TP_printk ( " dev %d:%d shrinker_hits %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > shrinker_hits )
) ;
2021-08-06 18:05:39 +00:00
DECLARE_EVENT_CLASS ( xfs_fs_class ,
TP_PROTO ( struct xfs_mount * mp , void * caller_ip ) ,
TP_ARGS ( mp , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned long long , mflags )
__field ( unsigned long , opstate )
__field ( unsigned long , sbflags )
__field ( void * , caller_ip )
) ,
TP_fast_assign (
if ( mp ) {
__entry - > dev = mp - > m_super - > s_dev ;
2021-08-19 01:46:52 +00:00
__entry - > mflags = mp - > m_features ;
2021-08-06 18:05:39 +00:00
__entry - > opstate = mp - > m_opstate ;
__entry - > sbflags = mp - > m_super - > s_flags ;
}
__entry - > caller_ip = caller_ip ;
) ,
2021-08-19 01:46:52 +00:00
TP_printk ( " dev %d:%d m_features 0x%llx opstate (%s) s_flags 0x%lx caller %pS " ,
2021-08-06 18:05:39 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > mflags ,
__print_flags ( __entry - > opstate , " | " , XFS_OPSTATE_STRINGS ) ,
__entry - > sbflags ,
__entry - > caller_ip )
) ;
# define DEFINE_FS_EVENT(name) \
DEFINE_EVENT ( xfs_fs_class , name , \
TP_PROTO ( struct xfs_mount * mp , void * caller_ip ) , \
TP_ARGS ( mp , caller_ip ) )
DEFINE_FS_EVENT ( xfs_inodegc_flush ) ;
2022-06-16 14:44:32 +00:00
DEFINE_FS_EVENT ( xfs_inodegc_push ) ;
2021-08-06 18:05:39 +00:00
DEFINE_FS_EVENT ( xfs_inodegc_start ) ;
DEFINE_FS_EVENT ( xfs_inodegc_stop ) ;
DEFINE_FS_EVENT ( xfs_inodegc_queue ) ;
DEFINE_FS_EVENT ( xfs_inodegc_throttle ) ;
DEFINE_FS_EVENT ( xfs_fs_sync_fs ) ;
2021-08-06 18:05:42 +00:00
DEFINE_FS_EVENT ( xfs_blockgc_start ) ;
DEFINE_FS_EVENT ( xfs_blockgc_stop ) ;
DEFINE_FS_EVENT ( xfs_blockgc_worker ) ;
2021-08-06 18:05:42 +00:00
DEFINE_FS_EVENT ( xfs_blockgc_flush_all ) ;
2021-08-06 18:05:39 +00:00
2021-08-06 18:05:43 +00:00
TRACE_EVENT ( xfs_inodegc_shrinker_scan ,
TP_PROTO ( struct xfs_mount * mp , struct shrink_control * sc ,
void * caller_ip ) ,
TP_ARGS ( mp , sc , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned long , nr_to_scan )
__field ( void * , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > nr_to_scan = sc - > nr_to_scan ;
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " dev %d:%d nr_to_scan %lu caller %pS " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > nr_to_scan ,
__entry - > caller_ip )
) ;
2013-11-01 04:27:19 +00:00
DECLARE_EVENT_CLASS ( xfs_ag_class ,
2024-11-04 04:18:36 +00:00
TP_PROTO ( const struct xfs_perag * pag ) ,
TP_ARGS ( pag ) ,
2013-11-01 04:27:19 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
) ,
TP_fast_assign (
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( pag ) - > m_super - > s_dev ;
__entry - > agno = pag_agno ( pag ) ;
2013-11-01 04:27:19 +00:00
) ,
2021-08-17 16:24:26 +00:00
TP_printk ( " dev %d:%d agno 0x%x " ,
2013-11-01 04:27:19 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno )
) ;
# define DEFINE_AG_EVENT(name) \
DEFINE_EVENT ( xfs_ag_class , name , \
2024-11-04 04:18:36 +00:00
TP_PROTO ( const struct xfs_perag * pag ) , \
TP_ARGS ( pag ) )
2013-11-01 04:27:19 +00:00
DEFINE_AG_EVENT ( xfs_read_agf ) ;
DEFINE_AG_EVENT ( xfs_alloc_read_agf ) ;
DEFINE_AG_EVENT ( xfs_read_agi ) ;
DEFINE_AG_EVENT ( xfs_ialloc_read_agi ) ;
2009-12-14 23:14:59 +00:00
TRACE_EVENT ( xfs_attr_list_node_descend ,
TP_PROTO ( struct xfs_attr_list_context * ctx ,
struct xfs_da_node_entry * btree ) ,
TP_ARGS ( ctx , btree ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( u32 , hashval )
__field ( u32 , blkno )
__field ( u32 , offset )
2020-02-27 01:30:37 +00:00
__field ( void * , buffer )
2009-12-14 23:14:59 +00:00
__field ( int , bufsize )
__field ( int , count )
__field ( int , firstu )
__field ( int , dupcnt )
2020-02-27 01:30:42 +00:00
__field ( unsigned int , attr_filter )
2009-12-14 23:14:59 +00:00
__field ( u32 , bt_hashval )
__field ( u32 , bt_before )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ctx - > dp ) - > i_sb - > s_dev ;
__entry - > ino = ctx - > dp - > i_ino ;
2020-02-27 01:30:43 +00:00
__entry - > hashval = ctx - > cursor . hashval ;
__entry - > blkno = ctx - > cursor . blkno ;
__entry - > offset = ctx - > cursor . offset ;
2020-02-27 01:30:37 +00:00
__entry - > buffer = ctx - > buffer ;
2009-12-14 23:14:59 +00:00
__entry - > bufsize = ctx - > bufsize ;
__entry - > count = ctx - > count ;
__entry - > firstu = ctx - > firstu ;
2020-02-27 01:30:42 +00:00
__entry - > attr_filter = ctx - > attr_filter ;
2009-12-14 23:14:59 +00:00
__entry - > bt_hashval = be32_to_cpu ( btree - > hashval ) ;
__entry - > bt_before = be32_to_cpu ( btree - > before ) ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
2020-02-27 01:30:42 +00:00
" buffer %p size %u count %u firstu %u filter %s "
2009-12-14 23:14:59 +00:00
" node hashval %u, node before %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > hashval ,
__entry - > blkno ,
__entry - > offset ,
__entry - > dupcnt ,
2020-02-27 01:30:37 +00:00
__entry - > buffer ,
2009-12-14 23:14:59 +00:00
__entry - > bufsize ,
__entry - > count ,
__entry - > firstu ,
2020-02-27 01:30:42 +00:00
__print_flags ( __entry - > attr_filter , " | " ,
XFS_ATTR_FILTER_FLAGS ) ,
2009-12-14 23:14:59 +00:00
__entry - > bt_hashval ,
__entry - > bt_before )
) ;
2009-12-21 14:03:03 +00:00
DECLARE_EVENT_CLASS ( xfs_bmap_class ,
2017-11-03 17:34:43 +00:00
TP_PROTO ( struct xfs_inode * ip , struct xfs_iext_cursor * cur , int state ,
2009-12-21 14:03:03 +00:00
unsigned long caller_ip ) ,
2017-11-03 17:34:43 +00:00
TP_ARGS ( ip , cur , state , caller_ip ) ,
2009-12-21 14:03:03 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
2019-10-24 20:26:59 +00:00
__field ( void * , leaf )
__field ( int , pos )
2009-12-21 14:03:03 +00:00
__field ( xfs_fileoff_t , startoff )
__field ( xfs_fsblock_t , startblock )
__field ( xfs_filblks_t , blockcount )
__field ( xfs_exntst_t , state )
__field ( int , bmap_state )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
2016-10-03 16:11:32 +00:00
struct xfs_ifork * ifp ;
2009-12-21 14:03:03 +00:00
struct xfs_bmbt_irec r ;
2016-10-03 16:11:32 +00:00
ifp = xfs_iext_state_to_fork ( ip , state ) ;
2017-11-03 17:34:43 +00:00
xfs_iext_get_extent ( ifp , cur , & r ) ;
2009-12-21 14:03:03 +00:00
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2017-11-03 17:34:46 +00:00
__entry - > leaf = cur - > leaf ;
__entry - > pos = cur - > pos ;
2009-12-21 14:03:03 +00:00
__entry - > startoff = r . br_startoff ;
__entry - > startblock = r . br_startblock ;
__entry - > blockcount = r . br_blockcount ;
__entry - > state = r . br_state ;
__entry - > bmap_state = state ;
__entry - > caller_ip = caller_ip ;
) ,
2018-01-09 19:43:36 +00:00
TP_printk ( " dev %d:%d ino 0x%llx state %s cur %p/%d "
2021-08-17 20:00:13 +00:00
" fileoff 0x%llx startblock 0x%llx fsbcount 0x%llx flag %d caller %pS " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_flags ( __entry - > bmap_state , " | " , XFS_BMAP_EXT_FLAGS ) ,
2017-11-03 17:34:46 +00:00
__entry - > leaf ,
__entry - > pos ,
2009-12-21 14:03:03 +00:00
__entry - > startoff ,
2017-06-16 18:00:05 +00:00
( int64_t ) __entry - > startblock ,
2009-12-21 14:03:03 +00:00
__entry - > blockcount ,
__entry - > state ,
( char * ) __entry - > caller_ip )
)
2009-12-14 23:14:59 +00:00
# define DEFINE_BMAP_EVENT(name) \
2009-12-21 14:03:03 +00:00
DEFINE_EVENT ( xfs_bmap_class , name , \
2017-11-03 17:34:43 +00:00
TP_PROTO ( struct xfs_inode * ip , struct xfs_iext_cursor * cur , int state , \
2009-12-14 23:14:59 +00:00
unsigned long caller_ip ) , \
2017-11-03 17:34:43 +00:00
TP_ARGS ( ip , cur , state , caller_ip ) )
2017-11-03 17:34:46 +00:00
DEFINE_BMAP_EVENT ( xfs_iext_insert ) ;
2009-12-14 23:14:59 +00:00
DEFINE_BMAP_EVENT ( xfs_iext_remove ) ;
DEFINE_BMAP_EVENT ( xfs_bmap_pre_update ) ;
DEFINE_BMAP_EVENT ( xfs_bmap_post_update ) ;
2017-10-19 18:06:29 +00:00
DEFINE_BMAP_EVENT ( xfs_read_extent ) ;
DEFINE_BMAP_EVENT ( xfs_write_extent ) ;
2009-12-14 23:14:59 +00:00
2009-12-21 14:03:03 +00:00
DECLARE_EVENT_CLASS ( xfs_buf_class ,
TP_PROTO ( struct xfs_buf * bp , unsigned long caller_ip ) ,
TP_ARGS ( bp , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_daddr_t , bno )
2012-04-23 05:58:51 +00:00
__field ( int , nblks )
2009-12-21 14:03:03 +00:00
__field ( int , hold )
__field ( int , pincount )
__field ( unsigned , lockval )
__field ( unsigned , flags )
__field ( unsigned long , caller_ip )
2022-04-26 01:37:05 +00:00
__field ( const void * , buf_ops )
2009-12-21 14:03:03 +00:00
) ,
TP_fast_assign (
__entry - > dev = bp - > b_target - > bt_dev ;
2021-08-19 01:47:05 +00:00
__entry - > bno = xfs_buf_daddr ( bp ) ;
2012-04-23 05:58:51 +00:00
__entry - > nblks = bp - > b_length ;
2009-12-21 14:03:03 +00:00
__entry - > hold = atomic_read ( & bp - > b_hold ) ;
__entry - > pincount = atomic_read ( & bp - > b_pin_count ) ;
2011-07-08 12:36:19 +00:00
__entry - > lockval = bp - > b_sema . count ;
2009-12-21 14:03:03 +00:00
__entry - > flags = bp - > b_flags ;
__entry - > caller_ip = caller_ip ;
2022-04-26 01:37:05 +00:00
__entry - > buf_ops = bp - > b_ops ;
2009-12-21 14:03:03 +00:00
) ,
2021-08-17 20:15:53 +00:00
TP_printk ( " dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
2022-04-26 01:37:05 +00:00
" lock %d flags %s bufops %pS caller %pS " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
( unsigned long long ) __entry - > bno ,
2012-04-23 05:58:51 +00:00
__entry - > nblks ,
2009-12-21 14:03:03 +00:00
__entry - > hold ,
__entry - > pincount ,
__entry - > lockval ,
__print_flags ( __entry - > flags , " | " , XFS_BUF_FLAGS ) ,
2022-04-26 01:37:05 +00:00
__entry - > buf_ops ,
2009-12-21 14:03:03 +00:00
( void * ) __entry - > caller_ip )
2009-12-14 23:14:59 +00:00
)
2009-12-21 14:03:03 +00:00
# define DEFINE_BUF_EVENT(name) \
DEFINE_EVENT ( xfs_buf_class , name , \
TP_PROTO ( struct xfs_buf * bp , unsigned long caller_ip ) , \
TP_ARGS ( bp , caller_ip ) )
2009-12-14 23:14:59 +00:00
DEFINE_BUF_EVENT ( xfs_buf_init ) ;
DEFINE_BUF_EVENT ( xfs_buf_free ) ;
DEFINE_BUF_EVENT ( xfs_buf_hold ) ;
DEFINE_BUF_EVENT ( xfs_buf_rele ) ;
DEFINE_BUF_EVENT ( xfs_buf_iodone ) ;
2014-10-01 23:05:14 +00:00
DEFINE_BUF_EVENT ( xfs_buf_submit ) ;
2009-12-14 23:14:59 +00:00
DEFINE_BUF_EVENT ( xfs_buf_lock ) ;
DEFINE_BUF_EVENT ( xfs_buf_lock_done ) ;
2016-06-21 01:53:28 +00:00
DEFINE_BUF_EVENT ( xfs_buf_trylock_fail ) ;
2011-07-08 12:36:19 +00:00
DEFINE_BUF_EVENT ( xfs_buf_trylock ) ;
2009-12-14 23:14:59 +00:00
DEFINE_BUF_EVENT ( xfs_buf_unlock ) ;
DEFINE_BUF_EVENT ( xfs_buf_iowait ) ;
DEFINE_BUF_EVENT ( xfs_buf_iowait_done ) ;
DEFINE_BUF_EVENT ( xfs_buf_delwri_queue ) ;
xfs: on-stack delayed write buffer lists
Queue delwri buffers on a local on-stack list instead of a per-buftarg one,
and write back the buffers per-process instead of by waking up xfsbufd.
This is now easily doable given that we have very few places left that write
delwri buffers:
- log recovery:
Only done at mount time, and already forcing out the buffers
synchronously using xfs_flush_buftarg
- quotacheck:
Same story.
- dquot reclaim:
Writes out dirty dquots on the LRU under memory pressure. We might
want to look into doing more of this via xfsaild, but it's already
more optimal than the synchronous inode reclaim that writes each
buffer synchronously.
- xfsaild:
This is the main beneficiary of the change. By keeping a local list
of buffers to write we reduce latency of writing out buffers, and
more importably we can remove all the delwri list promotions which
were hitting the buffer cache hard under sustained metadata loads.
The implementation is very straight forward - xfs_buf_delwri_queue now gets
a new list_head pointer that it adds the delwri buffers to, and all callers
need to eventually submit the list using xfs_buf_delwi_submit or
xfs_buf_delwi_submit_nowait. Buffers that already are on a delwri list are
skipped in xfs_buf_delwri_queue, assuming they already are on another delwri
list. The biggest change to pass down the buffer list was done to the AIL
pushing. Now that we operate on buffers the trylock, push and pushbuf log
item methods are merged into a single push routine, which tries to lock the
item, and if possible add the buffer that needs writeback to the buffer list.
This leads to much simpler code than the previous split but requires the
individual IOP_PUSH instances to unlock and reacquire the AIL around calls
to blocking routines.
Given that xfsailds now also handle writing out buffers, the conditions for
log forcing and the sleep times needed some small changes. The most
important one is that we consider an AIL busy as long we still have buffers
to push, and the other one is that we do increment the pushed LSN for
buffers that are under flushing at this moment, but still count them towards
the stuck items for restart purposes. Without this we could hammer on stuck
items without ever forcing the log and not make progress under heavy random
delete workloads on fast flash storage devices.
[ Dave Chinner:
- rebase on previous patches.
- improved comments for XBF_DELWRI_Q handling
- fix XBF_ASYNC handling in queue submission (test 106 failure)
- rename delwri submit function buffer list parameters for clarity
- xfs_efd_item_push() should return XFS_ITEM_PINNED ]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
2012-04-23 05:58:39 +00:00
DEFINE_BUF_EVENT ( xfs_buf_delwri_queued ) ;
2009-12-14 23:14:59 +00:00
DEFINE_BUF_EVENT ( xfs_buf_delwri_split ) ;
2017-06-15 04:21:45 +00:00
DEFINE_BUF_EVENT ( xfs_buf_delwri_pushbuf ) ;
2010-09-24 10:07:47 +00:00
DEFINE_BUF_EVENT ( xfs_buf_get_uncached ) ;
2009-12-14 23:14:59 +00:00
DEFINE_BUF_EVENT ( xfs_buf_item_relse ) ;
2020-09-01 17:55:29 +00:00
DEFINE_BUF_EVENT ( xfs_buf_iodone_async ) ;
2009-12-14 23:14:59 +00:00
DEFINE_BUF_EVENT ( xfs_buf_error_relse ) ;
2021-01-23 00:48:19 +00:00
DEFINE_BUF_EVENT ( xfs_buf_drain_buftarg ) ;
2009-12-14 23:14:59 +00:00
DEFINE_BUF_EVENT ( xfs_trans_read_buf_shut ) ;
/* not really buffer traces, but the buf provides useful information */
DEFINE_BUF_EVENT ( xfs_btree_corrupt ) ;
DEFINE_BUF_EVENT ( xfs_reset_dqcounts ) ;
/* pass flags explicitly */
2009-12-21 14:03:03 +00:00
DECLARE_EVENT_CLASS ( xfs_buf_flags_class ,
TP_PROTO ( struct xfs_buf * bp , unsigned flags , unsigned long caller_ip ) ,
TP_ARGS ( bp , flags , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_daddr_t , bno )
2021-08-17 20:15:53 +00:00
__field ( unsigned int , length )
2009-12-21 14:03:03 +00:00
__field ( int , hold )
__field ( int , pincount )
__field ( unsigned , lockval )
__field ( unsigned , flags )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = bp - > b_target - > bt_dev ;
2021-08-19 01:47:05 +00:00
__entry - > bno = xfs_buf_daddr ( bp ) ;
2021-08-17 20:15:53 +00:00
__entry - > length = bp - > b_length ;
2009-12-21 14:03:03 +00:00
__entry - > flags = flags ;
__entry - > hold = atomic_read ( & bp - > b_hold ) ;
__entry - > pincount = atomic_read ( & bp - > b_pin_count ) ;
2011-07-08 12:36:19 +00:00
__entry - > lockval = bp - > b_sema . count ;
2009-12-21 14:03:03 +00:00
__entry - > caller_ip = caller_ip ;
) ,
2021-08-17 20:15:53 +00:00
TP_printk ( " dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
2018-01-09 19:46:05 +00:00
" lock %d flags %s caller %pS " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
( unsigned long long ) __entry - > bno ,
2021-08-17 20:15:53 +00:00
__entry - > length ,
2009-12-21 14:03:03 +00:00
__entry - > hold ,
__entry - > pincount ,
__entry - > lockval ,
__print_flags ( __entry - > flags , " | " , XFS_BUF_FLAGS ) ,
( void * ) __entry - > caller_ip )
2009-12-14 23:14:59 +00:00
)
2009-12-21 14:03:03 +00:00
# define DEFINE_BUF_FLAGS_EVENT(name) \
DEFINE_EVENT ( xfs_buf_flags_class , name , \
TP_PROTO ( struct xfs_buf * bp , unsigned flags , unsigned long caller_ip ) , \
TP_ARGS ( bp , flags , caller_ip ) )
2009-12-14 23:14:59 +00:00
DEFINE_BUF_FLAGS_EVENT ( xfs_buf_find ) ;
DEFINE_BUF_FLAGS_EVENT ( xfs_buf_get ) ;
DEFINE_BUF_FLAGS_EVENT ( xfs_buf_read ) ;
TRACE_EVENT ( xfs_buf_ioerror ,
2018-01-08 18:51:02 +00:00
TP_PROTO ( struct xfs_buf * bp , int error , xfs_failaddr_t caller_ip ) ,
2009-12-14 23:14:59 +00:00
TP_ARGS ( bp , error , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_daddr_t , bno )
2021-08-17 20:15:53 +00:00
__field ( unsigned int , length )
2009-12-14 23:14:59 +00:00
__field ( unsigned , flags )
__field ( int , hold )
__field ( int , pincount )
__field ( unsigned , lockval )
__field ( int , error )
2018-01-08 18:51:02 +00:00
__field ( xfs_failaddr_t , caller_ip )
2009-12-14 23:14:59 +00:00
) ,
TP_fast_assign (
__entry - > dev = bp - > b_target - > bt_dev ;
2021-08-19 01:47:05 +00:00
__entry - > bno = xfs_buf_daddr ( bp ) ;
2021-08-17 20:15:53 +00:00
__entry - > length = bp - > b_length ;
2009-12-14 23:14:59 +00:00
__entry - > hold = atomic_read ( & bp - > b_hold ) ;
__entry - > pincount = atomic_read ( & bp - > b_pin_count ) ;
2011-07-08 12:36:19 +00:00
__entry - > lockval = bp - > b_sema . count ;
2009-12-14 23:14:59 +00:00
__entry - > error = error ;
__entry - > flags = bp - > b_flags ;
__entry - > caller_ip = caller_ip ;
) ,
2021-08-17 20:15:53 +00:00
TP_printk ( " dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
2018-01-08 18:51:02 +00:00
" lock %d error %d flags %s caller %pS " ,
2009-12-14 23:14:59 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
( unsigned long long ) __entry - > bno ,
2021-08-17 20:15:53 +00:00
__entry - > length ,
2009-12-14 23:14:59 +00:00
__entry - > hold ,
__entry - > pincount ,
__entry - > lockval ,
__entry - > error ,
__print_flags ( __entry - > flags , " | " , XFS_BUF_FLAGS ) ,
( void * ) __entry - > caller_ip )
) ;
2009-12-21 14:03:03 +00:00
DECLARE_EVENT_CLASS ( xfs_buf_item_class ,
TP_PROTO ( struct xfs_buf_log_item * bip ) ,
TP_ARGS ( bip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_daddr_t , buf_bno )
2021-08-17 20:15:53 +00:00
__field ( unsigned int , buf_len )
2009-12-21 14:03:03 +00:00
__field ( int , buf_hold )
__field ( int , buf_pincount )
__field ( int , buf_lockval )
__field ( unsigned , buf_flags )
__field ( unsigned , bli_recur )
__field ( int , bli_refcount )
__field ( unsigned , bli_flags )
2018-05-09 14:47:34 +00:00
__field ( unsigned long , li_flags )
2009-12-21 14:03:03 +00:00
) ,
TP_fast_assign (
__entry - > dev = bip - > bli_buf - > b_target - > bt_dev ;
__entry - > bli_flags = bip - > bli_flags ;
__entry - > bli_recur = bip - > bli_recur ;
__entry - > bli_refcount = atomic_read ( & bip - > bli_refcount ) ;
2021-08-19 01:47:05 +00:00
__entry - > buf_bno = xfs_buf_daddr ( bip - > bli_buf ) ;
2021-08-17 20:15:53 +00:00
__entry - > buf_len = bip - > bli_buf - > b_length ;
2009-12-21 14:03:03 +00:00
__entry - > buf_flags = bip - > bli_buf - > b_flags ;
__entry - > buf_hold = atomic_read ( & bip - > bli_buf - > b_hold ) ;
__entry - > buf_pincount = atomic_read ( & bip - > bli_buf - > b_pin_count ) ;
2011-07-08 12:36:19 +00:00
__entry - > buf_lockval = bip - > bli_buf - > b_sema . count ;
2009-12-21 14:03:03 +00:00
__entry - > li_flags = bip - > bli_item . li_flags ;
) ,
2021-08-17 20:15:53 +00:00
TP_printk ( " dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
2009-12-21 14:03:03 +00:00
" lock %d flags %s recur %d refcount %d bliflags %s "
2018-05-09 14:49:37 +00:00
" liflags %s " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
( unsigned long long ) __entry - > buf_bno ,
__entry - > buf_len ,
__entry - > buf_hold ,
__entry - > buf_pincount ,
__entry - > buf_lockval ,
__print_flags ( __entry - > buf_flags , " | " , XFS_BUF_FLAGS ) ,
__entry - > bli_recur ,
__entry - > bli_refcount ,
__print_flags ( __entry - > bli_flags , " | " , XFS_BLI_FLAGS ) ,
__print_flags ( __entry - > li_flags , " | " , XFS_LI_FLAGS ) )
2009-12-14 23:14:59 +00:00
)
2009-12-21 14:03:03 +00:00
# define DEFINE_BUF_ITEM_EVENT(name) \
DEFINE_EVENT ( xfs_buf_item_class , name , \
TP_PROTO ( struct xfs_buf_log_item * bip ) , \
TP_ARGS ( bip ) )
2009-12-14 23:14:59 +00:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_size ) ;
2013-06-27 06:04:52 +00:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_size_ordered ) ;
2009-12-14 23:14:59 +00:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_size_stale ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_format ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_format_stale ) ;
2013-06-27 06:04:52 +00:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_ordered ) ;
2009-12-14 23:14:59 +00:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_pin ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_unpin ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_unpin_stale ) ;
2019-06-29 02:27:32 +00:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_release ) ;
2009-12-14 23:14:59 +00:00
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_committed ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_buf_item_push ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_get_buf ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_get_buf_recur ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_getsb ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_getsb_recur ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_read_buf ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_read_buf_recur ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_log_buf ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_brelse ) ;
xfs: launder in-memory btree buffers before transaction commit
As we've noted in various places, all current users of in-memory btrees
are online fsck. Online fsck only stages a btree long enough to rebuild
an ondisk data structure, which means that the in-memory btree is
ephemeral. Furthermore, if we encounter /any/ errors while updating an
in-memory btree, all we do is tear down all the staged data and return
an errno to userspace. In-memory btrees need not be transactional, so
their buffers should not be committed to the ondisk log, nor should they
be checkpointed by the AIL. That's just as well since the ephemeral
nature of the btree means that the buftarg and the buffers may disappear
quickly anyway.
Therefore, we need a way to launder the btree buffers that get attached
to the transaction by the generic btree code. Because the buffers are
directly mapped to backing file pages, there's no need to bwrite them
back to the tmpfs file. All we need to do is clean enough of the buffer
log item state so that the bli can be detached from the buffer, remove
the bli from the transaction's log item list, and reset the transaction
dirty state as if the laundered items had never been there.
For simplicity, create xfbtree transaction commit and cancel helpers
that launder the in-memory btree buffers for callers. Once laundered,
call the write verifier on non-stale buffers to avoid integrity issues,
or punch a hole in the backing file for stale buffers.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2024-02-22 20:43:36 +00:00
DEFINE_BUF_ITEM_EVENT ( xfs_trans_bdetach ) ;
2009-12-14 23:14:59 +00:00
DEFINE_BUF_ITEM_EVENT ( xfs_trans_bjoin ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_bhold ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_bhold_release ) ;
DEFINE_BUF_ITEM_EVENT ( xfs_trans_binval ) ;
2014-04-22 21:11:52 +00:00
DECLARE_EVENT_CLASS ( xfs_filestream_class ,
2024-11-04 04:18:32 +00:00
TP_PROTO ( const struct xfs_perag * pag , xfs_ino_t ino ) ,
2023-02-12 22:14:56 +00:00
TP_ARGS ( pag , ino ) ,
2014-04-22 21:11:52 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_agnumber_t , agno )
__field ( int , streams )
) ,
TP_fast_assign (
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( pag ) - > m_super - > s_dev ;
2018-04-09 17:23:39 +00:00
__entry - > ino = ino ;
2024-11-04 04:18:38 +00:00
__entry - > agno = pag_agno ( pag ) ;
2023-02-12 22:14:56 +00:00
__entry - > streams = atomic_read ( & pag - > pagf_fstrms ) ;
2014-04-22 21:11:52 +00:00
) ,
2021-08-17 16:24:26 +00:00
TP_printk ( " dev %d:%d ino 0x%llx agno 0x%x streams %d " ,
2014-04-22 21:11:52 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > agno ,
__entry - > streams )
)
# define DEFINE_FILESTREAM_EVENT(name) \
DEFINE_EVENT ( xfs_filestream_class , name , \
2024-11-04 04:18:32 +00:00
TP_PROTO ( const struct xfs_perag * pag , xfs_ino_t ino ) , \
2023-02-12 22:14:56 +00:00
TP_ARGS ( pag , ino ) )
2014-04-22 21:11:52 +00:00
DEFINE_FILESTREAM_EVENT ( xfs_filestream_free ) ;
DEFINE_FILESTREAM_EVENT ( xfs_filestream_lookup ) ;
DEFINE_FILESTREAM_EVENT ( xfs_filestream_scan ) ;
TRACE_EVENT ( xfs_filestream_pick ,
2024-11-04 04:18:32 +00:00
TP_PROTO ( const struct xfs_perag * pag , xfs_ino_t ino ) ,
2024-10-23 13:37:22 +00:00
TP_ARGS ( pag , ino ) ,
2014-04-22 21:11:52 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_agnumber_t , agno )
__field ( int , streams )
__field ( xfs_extlen_t , free )
) ,
TP_fast_assign (
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( pag ) - > m_super - > s_dev ;
2023-02-12 22:14:56 +00:00
__entry - > ino = ino ;
2024-11-04 04:18:38 +00:00
__entry - > agno = pag_agno ( pag ) ;
2024-10-23 13:37:22 +00:00
__entry - > streams = atomic_read ( & pag - > pagf_fstrms ) ;
__entry - > free = pag - > pagf_freeblks ;
2014-04-22 21:11:52 +00:00
) ,
2023-02-12 22:14:56 +00:00
TP_printk ( " dev %d:%d ino 0x%llx agno 0x%x streams %d free %d " ,
2014-04-22 21:11:52 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > agno ,
__entry - > streams ,
2023-02-12 22:14:56 +00:00
__entry - > free )
2014-04-22 21:11:52 +00:00
) ;
2009-12-21 14:03:03 +00:00
DECLARE_EVENT_CLASS ( xfs_lock_class ,
TP_PROTO ( struct xfs_inode * ip , unsigned lock_flags ,
unsigned long caller_ip ) ,
TP_ARGS ( ip , lock_flags , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( int , lock_flags )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > lock_flags = lock_flags ;
__entry - > caller_ip = caller_ip ;
) ,
2018-01-09 19:46:05 +00:00
TP_printk ( " dev %d:%d ino 0x%llx flags %s caller %pS " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_flags ( __entry - > lock_flags , " | " , XFS_LOCK_FLAGS ) ,
( void * ) __entry - > caller_ip )
)
2009-12-14 23:14:59 +00:00
# define DEFINE_LOCK_EVENT(name) \
2009-12-21 14:03:03 +00:00
DEFINE_EVENT ( xfs_lock_class , name , \
2009-12-14 23:14:59 +00:00
TP_PROTO ( struct xfs_inode * ip , unsigned lock_flags , \
unsigned long caller_ip ) , \
2009-12-21 14:03:03 +00:00
TP_ARGS ( ip , lock_flags , caller_ip ) )
2009-12-14 23:14:59 +00:00
DEFINE_LOCK_EVENT ( xfs_ilock ) ;
DEFINE_LOCK_EVENT ( xfs_ilock_nowait ) ;
DEFINE_LOCK_EVENT ( xfs_ilock_demote ) ;
DEFINE_LOCK_EVENT ( xfs_iunlock ) ;
2010-06-24 01:57:09 +00:00
DECLARE_EVENT_CLASS ( xfs_inode_class ,
2009-12-21 14:03:03 +00:00
TP_PROTO ( struct xfs_inode * ip ) ,
TP_ARGS ( ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
2021-08-06 18:05:39 +00:00
__field ( unsigned long , iflags )
2009-12-21 14:03:03 +00:00
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2021-08-06 18:05:39 +00:00
__entry - > iflags = ip - > i_flags ;
2009-12-21 14:03:03 +00:00
) ,
2021-08-06 18:05:39 +00:00
TP_printk ( " dev %d:%d ino 0x%llx iflags 0x%lx " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2021-08-06 18:05:39 +00:00
__entry - > ino ,
__entry - > iflags )
2009-12-21 14:03:03 +00:00
)
2010-06-24 01:57:09 +00:00
# define DEFINE_INODE_EVENT(name) \
DEFINE_EVENT ( xfs_inode_class , name , \
2009-12-14 23:14:59 +00:00
TP_PROTO ( struct xfs_inode * ip ) , \
2009-12-21 14:03:03 +00:00
TP_ARGS ( ip ) )
2010-06-24 01:57:09 +00:00
DEFINE_INODE_EVENT ( xfs_iget_skip ) ;
2021-06-18 18:57:05 +00:00
DEFINE_INODE_EVENT ( xfs_iget_recycle ) ;
DEFINE_INODE_EVENT ( xfs_iget_recycle_fail ) ;
2010-06-24 01:57:09 +00:00
DEFINE_INODE_EVENT ( xfs_iget_hit ) ;
DEFINE_INODE_EVENT ( xfs_iget_miss ) ;
2009-12-14 23:14:59 +00:00
2010-06-24 01:57:09 +00:00
DEFINE_INODE_EVENT ( xfs_getattr ) ;
DEFINE_INODE_EVENT ( xfs_setattr ) ;
DEFINE_INODE_EVENT ( xfs_readlink ) ;
2013-06-17 20:35:57 +00:00
DEFINE_INODE_EVENT ( xfs_inactive_symlink ) ;
2010-06-24 01:57:09 +00:00
DEFINE_INODE_EVENT ( xfs_alloc_file_space ) ;
DEFINE_INODE_EVENT ( xfs_free_file_space ) ;
2014-04-14 08:15:11 +00:00
DEFINE_INODE_EVENT ( xfs_zero_file_space ) ;
2014-02-23 23:58:19 +00:00
DEFINE_INODE_EVENT ( xfs_collapse_file_space ) ;
2015-03-25 04:08:56 +00:00
DEFINE_INODE_EVENT ( xfs_insert_file_space ) ;
2010-06-24 01:57:09 +00:00
DEFINE_INODE_EVENT ( xfs_readdir ) ;
2010-07-20 07:54:41 +00:00
# ifdef CONFIG_XFS_POSIX_ACL
2011-07-23 15:37:31 +00:00
DEFINE_INODE_EVENT ( xfs_get_acl ) ;
2010-07-20 07:54:41 +00:00
# endif
2010-06-24 01:57:09 +00:00
DEFINE_INODE_EVENT ( xfs_vm_bmap ) ;
DEFINE_INODE_EVENT ( xfs_file_ioctl ) ;
DEFINE_INODE_EVENT ( xfs_file_compat_ioctl ) ;
DEFINE_INODE_EVENT ( xfs_ioctl_setattr ) ;
2011-10-02 14:25:16 +00:00
DEFINE_INODE_EVENT ( xfs_dir_fsync ) ;
2010-06-24 01:57:09 +00:00
DEFINE_INODE_EVENT ( xfs_file_fsync ) ;
DEFINE_INODE_EVENT ( xfs_destroy_inode ) ;
2012-06-06 21:01:28 +00:00
DEFINE_INODE_EVENT ( xfs_update_time ) ;
2010-06-24 01:57:09 +00:00
DEFINE_INODE_EVENT ( xfs_dquot_dqalloc ) ;
DEFINE_INODE_EVENT ( xfs_dquot_dqdetach ) ;
2012-11-06 14:50:38 +00:00
DEFINE_INODE_EVENT ( xfs_inode_set_eofblocks_tag ) ;
DEFINE_INODE_EVENT ( xfs_inode_clear_eofblocks_tag ) ;
2012-11-06 14:50:42 +00:00
DEFINE_INODE_EVENT ( xfs_inode_free_eofblocks_invalid ) ;
2016-10-03 16:11:46 +00:00
DEFINE_INODE_EVENT ( xfs_inode_set_cowblocks_tag ) ;
DEFINE_INODE_EVENT ( xfs_inode_clear_cowblocks_tag ) ;
DEFINE_INODE_EVENT ( xfs_inode_free_cowblocks_invalid ) ;
2021-08-06 18:05:39 +00:00
DEFINE_INODE_EVENT ( xfs_inode_set_reclaimable ) ;
DEFINE_INODE_EVENT ( xfs_inode_reclaiming ) ;
DEFINE_INODE_EVENT ( xfs_inode_set_need_inactive ) ;
DEFINE_INODE_EVENT ( xfs_inode_inactivating ) ;
2012-11-06 14:50:38 +00:00
2018-12-18 22:32:29 +00:00
/*
* ftrace ' s __print_symbolic requires that all enum values be wrapped in the
* TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace
* ring buffer . Somehow this was only worth mentioning in the ftrace sample
* code .
*/
2022-10-26 21:23:58 +00:00
TRACE_DEFINE_ENUM ( XFS_REFC_DOMAIN_SHARED ) ;
TRACE_DEFINE_ENUM ( XFS_REFC_DOMAIN_COW ) ;
2024-10-29 15:11:57 +00:00
DECLARE_EVENT_CLASS ( xfs_fault_class ,
TP_PROTO ( struct xfs_inode * ip , unsigned int order ) ,
TP_ARGS ( ip , order ) ,
2017-08-29 17:08:41 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
2023-08-18 20:23:35 +00:00
__field ( unsigned int , order )
2017-08-29 17:08:41 +00:00
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2023-08-18 20:23:35 +00:00
__entry - > order = order ;
2017-08-29 17:08:41 +00:00
) ,
2024-10-29 15:11:57 +00:00
TP_printk ( " dev %d:%d ino 0x%llx order %u " ,
2017-08-29 17:08:41 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
2024-10-29 15:11:57 +00:00
__entry - > order )
2017-08-29 17:08:41 +00:00
)
2024-10-29 15:11:57 +00:00
# define DEFINE_FAULT_EVENT(name) \
DEFINE_EVENT ( xfs_fault_class , name , \
TP_PROTO ( struct xfs_inode * ip , unsigned int order ) , \
TP_ARGS ( ip , order ) )
DEFINE_FAULT_EVENT ( xfs_read_fault ) ;
DEFINE_FAULT_EVENT ( xfs_write_fault ) ;
2010-06-24 01:57:09 +00:00
DECLARE_EVENT_CLASS ( xfs_iref_class ,
2009-12-21 14:03:03 +00:00
TP_PROTO ( struct xfs_inode * ip , unsigned long caller_ip ) ,
TP_ARGS ( ip , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( int , count )
2010-03-08 00:24:07 +00:00
__field ( int , pincount )
2009-12-21 14:03:03 +00:00
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > count = atomic_read ( & VFS_I ( ip ) - > i_count ) ;
2010-03-08 00:24:07 +00:00
__entry - > pincount = atomic_read ( & ip - > i_pincount ) ;
2009-12-21 14:03:03 +00:00
__entry - > caller_ip = caller_ip ;
) ,
2018-01-09 19:46:05 +00:00
TP_printk ( " dev %d:%d ino 0x%llx count %d pincount %d caller %pS " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > count ,
2010-03-08 00:24:07 +00:00
__entry - > pincount ,
2009-12-21 14:03:03 +00:00
( char * ) __entry - > caller_ip )
2013-03-18 14:51:48 +00:00
)
TRACE_EVENT ( xfs_iomap_prealloc_size ,
TP_PROTO ( struct xfs_inode * ip , xfs_fsblock_t blocks , int shift ,
unsigned int writeio_blocks ) ,
TP_ARGS ( ip , blocks , shift , writeio_blocks ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fsblock_t , blocks )
__field ( int , shift )
__field ( unsigned int , writeio_blocks )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > blocks = blocks ;
__entry - > shift = shift ;
__entry - > writeio_blocks = writeio_blocks ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx prealloc blocks %llu shift %d "
2019-10-28 15:41:44 +00:00
" m_allocsize_blocks %u " ,
2013-03-18 14:51:48 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) , __entry - > ino ,
__entry - > blocks , __entry - > shift , __entry - > writeio_blocks )
2009-12-21 14:03:03 +00:00
)
2015-05-28 23:18:32 +00:00
TRACE_EVENT ( xfs_irec_merge_pre ,
2024-11-04 04:18:33 +00:00
TP_PROTO ( const struct xfs_perag * pag ,
const struct xfs_inobt_rec_incore * rec ,
const struct xfs_inobt_rec_incore * nrec ) ,
TP_ARGS ( pag , rec , nrec ) ,
2015-05-28 23:18:32 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
__field ( uint16_t , holemask )
__field ( xfs_agino_t , nagino )
__field ( uint16_t , nholemask )
) ,
TP_fast_assign (
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( pag ) - > m_super - > s_dev ;
__entry - > agno = pag_agno ( pag ) ;
2024-11-04 04:18:33 +00:00
__entry - > agino = rec - > ir_startino ;
__entry - > holemask = rec - > ir_holemask ;
__entry - > nagino = nrec - > ir_startino ;
__entry - > nholemask = nrec - > ir_holemask ;
2015-05-28 23:18:32 +00:00
) ,
2021-08-17 16:24:26 +00:00
TP_printk ( " dev %d:%d agno 0x%x agino 0x%x holemask 0x%x new_agino 0x%x new_holemask 0x%x " ,
2021-08-17 16:20:27 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agino ,
__entry - > holemask ,
__entry - > nagino ,
2015-05-28 23:18:32 +00:00
__entry - > nholemask )
)
TRACE_EVENT ( xfs_irec_merge_post ,
2024-11-04 04:18:33 +00:00
TP_PROTO ( const struct xfs_perag * pag ,
const struct xfs_inobt_rec_incore * nrec ) ,
TP_ARGS ( pag , nrec ) ,
2015-05-28 23:18:32 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
__field ( uint16_t , holemask )
) ,
TP_fast_assign (
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( pag ) - > m_super - > s_dev ;
__entry - > agno = pag_agno ( pag ) ;
2024-11-04 04:18:33 +00:00
__entry - > agino = nrec - > ir_startino ;
__entry - > holemask = nrec - > ir_holemask ;
2015-05-28 23:18:32 +00:00
) ,
2021-08-17 16:24:26 +00:00
TP_printk ( " dev %d:%d agno 0x%x agino 0x%x holemask 0x%x " ,
2021-08-17 16:20:27 +00:00
MAJOR ( __entry - > dev ) ,
MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agino ,
2015-05-28 23:18:32 +00:00
__entry - > holemask )
)
2010-06-24 01:57:09 +00:00
# define DEFINE_IREF_EVENT(name) \
DEFINE_EVENT ( xfs_iref_class , name , \
2009-12-14 23:14:59 +00:00
TP_PROTO ( struct xfs_inode * ip , unsigned long caller_ip ) , \
2009-12-21 14:03:03 +00:00
TP_ARGS ( ip , caller_ip ) )
2010-06-24 01:57:09 +00:00
DEFINE_IREF_EVENT ( xfs_irele ) ;
DEFINE_IREF_EVENT ( xfs_inode_pin ) ;
DEFINE_IREF_EVENT ( xfs_inode_unpin ) ;
DEFINE_IREF_EVENT ( xfs_inode_unpin_nowait ) ;
DECLARE_EVENT_CLASS ( xfs_namespace_class ,
2022-03-09 18:16:09 +00:00
TP_PROTO ( struct xfs_inode * dp , const struct xfs_name * name ) ,
2010-06-24 01:57:09 +00:00
TP_ARGS ( dp , name ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , dp_ino )
2012-02-28 11:01:40 +00:00
__field ( int , namelen )
2010-06-24 01:57:09 +00:00
__dynamic_array ( char , name , name - > len )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( dp ) - > i_sb - > s_dev ;
__entry - > dp_ino = dp - > i_ino ;
2012-02-28 11:01:40 +00:00
__entry - > namelen = name - > len ;
2010-06-24 01:57:09 +00:00
memcpy ( __get_str ( name ) , name - > name , name - > len ) ;
) ,
2012-02-28 11:01:40 +00:00
TP_printk ( " dev %d:%d dp ino 0x%llx name %.*s " ,
2010-06-24 01:57:09 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > dp_ino ,
2012-02-28 11:01:40 +00:00
__entry - > namelen ,
2010-06-24 01:57:09 +00:00
__get_str ( name ) )
)
# define DEFINE_NAMESPACE_EVENT(name) \
DEFINE_EVENT ( xfs_namespace_class , name , \
2022-03-09 18:16:09 +00:00
TP_PROTO ( struct xfs_inode * dp , const struct xfs_name * name ) , \
2010-06-24 01:57:09 +00:00
TP_ARGS ( dp , name ) )
DEFINE_NAMESPACE_EVENT ( xfs_remove ) ;
DEFINE_NAMESPACE_EVENT ( xfs_link ) ;
DEFINE_NAMESPACE_EVENT ( xfs_lookup ) ;
DEFINE_NAMESPACE_EVENT ( xfs_create ) ;
DEFINE_NAMESPACE_EVENT ( xfs_symlink ) ;
2010-03-08 00:24:07 +00:00
2010-06-24 01:57:09 +00:00
TRACE_EVENT ( xfs_rename ,
TP_PROTO ( struct xfs_inode * src_dp , struct xfs_inode * target_dp ,
struct xfs_name * src_name , struct xfs_name * target_name ) ,
TP_ARGS ( src_dp , target_dp , src_name , target_name ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , src_dp_ino )
__field ( xfs_ino_t , target_dp_ino )
2012-02-28 11:01:40 +00:00
__field ( int , src_namelen )
__field ( int , target_namelen )
2010-06-24 01:57:09 +00:00
__dynamic_array ( char , src_name , src_name - > len )
__dynamic_array ( char , target_name , target_name - > len )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( src_dp ) - > i_sb - > s_dev ;
__entry - > src_dp_ino = src_dp - > i_ino ;
__entry - > target_dp_ino = target_dp - > i_ino ;
2012-02-28 11:01:40 +00:00
__entry - > src_namelen = src_name - > len ;
__entry - > target_namelen = target_name - > len ;
2010-06-24 01:57:09 +00:00
memcpy ( __get_str ( src_name ) , src_name - > name , src_name - > len ) ;
2012-02-28 11:01:40 +00:00
memcpy ( __get_str ( target_name ) , target_name - > name ,
target_name - > len ) ;
2010-06-24 01:57:09 +00:00
) ,
TP_printk ( " dev %d:%d src dp ino 0x%llx target dp ino 0x%llx "
2012-02-28 11:01:40 +00:00
" src name %.*s target name %.*s " ,
2010-06-24 01:57:09 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > src_dp_ino ,
__entry - > target_dp_ino ,
2012-02-28 11:01:40 +00:00
__entry - > src_namelen ,
2010-06-24 01:57:09 +00:00
__get_str ( src_name ) ,
2012-02-28 11:01:40 +00:00
__entry - > target_namelen ,
2010-06-24 01:57:09 +00:00
__get_str ( target_name ) )
)
2009-12-14 23:14:59 +00:00
2009-12-21 14:03:03 +00:00
DECLARE_EVENT_CLASS ( xfs_dquot_class ,
TP_PROTO ( struct xfs_dquot * dqp ) ,
TP_ARGS ( dqp ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2010-02-14 22:01:45 +00:00
__field ( u32 , id )
2020-07-16 00:53:43 +00:00
__field ( xfs_dqtype_t , type )
2009-12-21 14:03:03 +00:00
__field ( unsigned , flags )
__field ( unsigned , nrefs )
__field ( unsigned long long , res_bcount )
2020-07-14 17:37:35 +00:00
__field ( unsigned long long , res_rtbcount )
__field ( unsigned long long , res_icount )
2009-12-21 14:03:03 +00:00
__field ( unsigned long long , bcount )
2020-07-14 17:37:35 +00:00
__field ( unsigned long long , rtbcount )
2009-12-21 14:03:03 +00:00
__field ( unsigned long long , icount )
2020-07-14 17:37:35 +00:00
2009-12-21 14:03:03 +00:00
__field ( unsigned long long , blk_hardlimit )
__field ( unsigned long long , blk_softlimit )
2020-07-14 17:37:35 +00:00
__field ( unsigned long long , rtb_hardlimit )
__field ( unsigned long long , rtb_softlimit )
2009-12-21 14:03:03 +00:00
__field ( unsigned long long , ino_hardlimit )
__field ( unsigned long long , ino_softlimit )
2020-07-14 17:37:35 +00:00
) ,
2009-12-21 14:03:03 +00:00
TP_fast_assign (
__entry - > dev = dqp - > q_mount - > m_super - > s_dev ;
2020-07-14 17:37:30 +00:00
__entry - > id = dqp - > q_id ;
2020-07-16 00:53:43 +00:00
__entry - > type = dqp - > q_type ;
__entry - > flags = dqp - > q_flags ;
2009-12-21 14:03:03 +00:00
__entry - > nrefs = dqp - > q_nrefs ;
2020-07-14 17:37:35 +00:00
2020-07-14 17:37:30 +00:00
__entry - > res_bcount = dqp - > q_blk . reserved ;
2020-07-14 17:37:35 +00:00
__entry - > res_rtbcount = dqp - > q_rtb . reserved ;
__entry - > res_icount = dqp - > q_ino . reserved ;
2020-07-14 17:37:31 +00:00
__entry - > bcount = dqp - > q_blk . count ;
2020-07-14 17:37:35 +00:00
__entry - > rtbcount = dqp - > q_rtb . count ;
2020-07-14 17:37:31 +00:00
__entry - > icount = dqp - > q_ino . count ;
2020-07-14 17:37:35 +00:00
2020-07-14 17:37:31 +00:00
__entry - > blk_hardlimit = dqp - > q_blk . hardlimit ;
__entry - > blk_softlimit = dqp - > q_blk . softlimit ;
2020-07-14 17:37:35 +00:00
__entry - > rtb_hardlimit = dqp - > q_rtb . hardlimit ;
__entry - > rtb_softlimit = dqp - > q_rtb . softlimit ;
2020-07-14 17:37:31 +00:00
__entry - > ino_hardlimit = dqp - > q_ino . hardlimit ;
__entry - > ino_softlimit = dqp - > q_ino . softlimit ;
2009-12-21 14:03:03 +00:00
) ,
2020-07-16 00:53:43 +00:00
TP_printk ( " dev %d:%d id 0x%x type %s flags %s nrefs %u "
2020-07-14 17:37:35 +00:00
" res_bc 0x%llx res_rtbc 0x%llx res_ic 0x%llx "
2010-02-14 22:01:45 +00:00
" bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
2020-07-14 17:37:35 +00:00
" rtbcnt 0x%llx rtbhardlimit 0x%llx rtbsoftlimit 0x%llx "
2010-02-14 22:01:45 +00:00
" icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx] " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2010-02-14 22:01:45 +00:00
__entry - > id ,
2020-07-16 00:53:43 +00:00
__print_flags ( __entry - > type , " | " , XFS_DQTYPE_STRINGS ) ,
2020-07-14 17:37:13 +00:00
__print_flags ( __entry - > flags , " | " , XFS_DQFLAG_STRINGS ) ,
2009-12-21 14:03:03 +00:00
__entry - > nrefs ,
__entry - > res_bcount ,
2020-07-14 17:37:35 +00:00
__entry - > res_rtbcount ,
__entry - > res_icount ,
2009-12-21 14:03:03 +00:00
__entry - > bcount ,
__entry - > blk_hardlimit ,
__entry - > blk_softlimit ,
2020-07-14 17:37:35 +00:00
__entry - > rtbcount ,
__entry - > rtb_hardlimit ,
__entry - > rtb_softlimit ,
2009-12-21 14:03:03 +00:00
__entry - > icount ,
__entry - > ino_hardlimit ,
__entry - > ino_softlimit )
2009-12-14 23:14:59 +00:00
)
2009-12-21 14:03:03 +00:00
# define DEFINE_DQUOT_EVENT(name) \
DEFINE_EVENT ( xfs_dquot_class , name , \
TP_PROTO ( struct xfs_dquot * dqp ) , \
TP_ARGS ( dqp ) )
2009-12-14 23:14:59 +00:00
DEFINE_DQUOT_EVENT ( xfs_dqadjust ) ;
DEFINE_DQUOT_EVENT ( xfs_dqreclaim_want ) ;
DEFINE_DQUOT_EVENT ( xfs_dqreclaim_dirty ) ;
2012-02-01 13:57:20 +00:00
DEFINE_DQUOT_EVENT ( xfs_dqreclaim_busy ) ;
DEFINE_DQUOT_EVENT ( xfs_dqreclaim_done ) ;
2009-12-14 23:14:59 +00:00
DEFINE_DQUOT_EVENT ( xfs_dqattach_found ) ;
DEFINE_DQUOT_EVENT ( xfs_dqattach_get ) ;
DEFINE_DQUOT_EVENT ( xfs_dqalloc ) ;
DEFINE_DQUOT_EVENT ( xfs_dqtobp_read ) ;
DEFINE_DQUOT_EVENT ( xfs_dqread ) ;
DEFINE_DQUOT_EVENT ( xfs_dqread_fail ) ;
DEFINE_DQUOT_EVENT ( xfs_dqget_hit ) ;
DEFINE_DQUOT_EVENT ( xfs_dqget_miss ) ;
2012-03-13 08:52:35 +00:00
DEFINE_DQUOT_EVENT ( xfs_dqget_freeing ) ;
DEFINE_DQUOT_EVENT ( xfs_dqget_dup ) ;
2009-12-14 23:14:59 +00:00
DEFINE_DQUOT_EVENT ( xfs_dqput ) ;
DEFINE_DQUOT_EVENT ( xfs_dqput_free ) ;
DEFINE_DQUOT_EVENT ( xfs_dqrele ) ;
DEFINE_DQUOT_EVENT ( xfs_dqflush ) ;
DEFINE_DQUOT_EVENT ( xfs_dqflush_force ) ;
DEFINE_DQUOT_EVENT ( xfs_dqflush_done ) ;
2020-07-14 17:37:35 +00:00
DEFINE_DQUOT_EVENT ( xfs_trans_apply_dquot_deltas_before ) ;
DEFINE_DQUOT_EVENT ( xfs_trans_apply_dquot_deltas_after ) ;
TRACE_EVENT ( xfs_trans_mod_dquot ,
TP_PROTO ( struct xfs_trans * tp , struct xfs_dquot * dqp ,
unsigned int field , int64_t delta ) ,
TP_ARGS ( tp , dqp , field , delta ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2020-07-16 00:53:43 +00:00
__field ( xfs_dqtype_t , type )
2020-07-14 17:37:35 +00:00
__field ( unsigned int , flags )
__field ( unsigned int , dqid )
__field ( unsigned int , field )
__field ( int64_t , delta )
) ,
TP_fast_assign (
__entry - > dev = tp - > t_mountp - > m_super - > s_dev ;
2020-07-16 00:53:43 +00:00
__entry - > type = dqp - > q_type ;
__entry - > flags = dqp - > q_flags ;
2020-07-14 17:37:35 +00:00
__entry - > dqid = dqp - > q_id ;
__entry - > field = field ;
__entry - > delta = delta ;
) ,
2020-07-16 00:53:43 +00:00
TP_printk ( " dev %d:%d dquot id 0x%x type %s flags %s field %s delta %lld " ,
2020-07-14 17:37:35 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > dqid ,
2020-07-16 00:53:43 +00:00
__print_flags ( __entry - > type , " | " , XFS_DQTYPE_STRINGS ) ,
2020-07-14 17:37:35 +00:00
__print_flags ( __entry - > flags , " | " , XFS_DQFLAG_STRINGS ) ,
__print_flags ( __entry - > field , " | " , XFS_QMOPT_FLAGS ) ,
__entry - > delta )
) ;
DECLARE_EVENT_CLASS ( xfs_dqtrx_class ,
TP_PROTO ( struct xfs_dqtrx * qtrx ) ,
TP_ARGS ( qtrx ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2020-07-16 00:53:43 +00:00
__field ( xfs_dqtype_t , type )
2020-07-14 17:37:35 +00:00
__field ( unsigned int , flags )
__field ( u32 , dqid )
__field ( uint64_t , blk_res )
__field ( int64_t , bcount_delta )
__field ( int64_t , delbcnt_delta )
__field ( uint64_t , rtblk_res )
__field ( uint64_t , rtblk_res_used )
__field ( int64_t , rtbcount_delta )
__field ( int64_t , delrtb_delta )
__field ( uint64_t , ino_res )
__field ( uint64_t , ino_res_used )
__field ( int64_t , icount_delta )
) ,
TP_fast_assign (
__entry - > dev = qtrx - > qt_dquot - > q_mount - > m_super - > s_dev ;
2020-07-16 00:53:43 +00:00
__entry - > type = qtrx - > qt_dquot - > q_type ;
__entry - > flags = qtrx - > qt_dquot - > q_flags ;
2020-07-14 17:37:35 +00:00
__entry - > dqid = qtrx - > qt_dquot - > q_id ;
__entry - > blk_res = qtrx - > qt_blk_res ;
__entry - > bcount_delta = qtrx - > qt_bcount_delta ;
__entry - > delbcnt_delta = qtrx - > qt_delbcnt_delta ;
__entry - > rtblk_res = qtrx - > qt_rtblk_res ;
__entry - > rtblk_res_used = qtrx - > qt_rtblk_res_used ;
__entry - > rtbcount_delta = qtrx - > qt_rtbcount_delta ;
__entry - > delrtb_delta = qtrx - > qt_delrtb_delta ;
__entry - > ino_res = qtrx - > qt_ino_res ;
__entry - > ino_res_used = qtrx - > qt_ino_res_used ;
__entry - > icount_delta = qtrx - > qt_icount_delta ;
) ,
2022-09-18 20:51:14 +00:00
TP_printk ( " dev %d:%d dquot id 0x%x type %s flags %s "
2020-07-14 17:37:35 +00:00
" blk_res %llu bcount_delta %lld delbcnt_delta %lld "
" rtblk_res %llu rtblk_res_used %llu rtbcount_delta %lld delrtb_delta %lld "
" ino_res %llu ino_res_used %llu icount_delta %lld " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > dqid ,
2020-07-16 00:53:43 +00:00
__print_flags ( __entry - > type , " | " , XFS_DQTYPE_STRINGS ) ,
2020-07-14 17:37:35 +00:00
__print_flags ( __entry - > flags , " | " , XFS_DQFLAG_STRINGS ) ,
__entry - > blk_res ,
__entry - > bcount_delta ,
__entry - > delbcnt_delta ,
__entry - > rtblk_res ,
__entry - > rtblk_res_used ,
__entry - > rtbcount_delta ,
__entry - > delrtb_delta ,
__entry - > ino_res ,
__entry - > ino_res_used ,
__entry - > icount_delta )
)
# define DEFINE_DQTRX_EVENT(name) \
DEFINE_EVENT ( xfs_dqtrx_class , name , \
TP_PROTO ( struct xfs_dqtrx * qtrx ) , \
TP_ARGS ( qtrx ) )
DEFINE_DQTRX_EVENT ( xfs_trans_apply_dquot_deltas ) ;
DEFINE_DQTRX_EVENT ( xfs_trans_mod_dquot_before ) ;
DEFINE_DQTRX_EVENT ( xfs_trans_mod_dquot_after ) ;
2009-12-14 23:14:59 +00:00
2009-12-21 14:03:03 +00:00
DECLARE_EVENT_CLASS ( xfs_loggrant_class ,
2012-06-14 14:22:15 +00:00
TP_PROTO ( struct xlog * log , struct xlog_ticket * tic ) ,
2009-12-21 14:03:03 +00:00
TP_ARGS ( log , tic ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:27 +00:00
__field ( unsigned long , tic )
2009-12-21 14:03:03 +00:00
__field ( char , ocnt )
__field ( char , cnt )
__field ( int , curr_res )
__field ( int , unit_res )
__field ( unsigned int , flags )
2010-12-21 01:02:25 +00:00
__field ( int , reserveq )
__field ( int , writeq )
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:27 +00:00
__field ( uint64_t , grant_reserve_bytes )
__field ( uint64_t , grant_write_bytes )
__field ( uint64_t , tail_space )
2009-12-21 14:03:03 +00:00
__field ( int , curr_cycle )
__field ( int , curr_block )
__field ( xfs_lsn_t , tail_lsn )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:27 +00:00
__entry - > tic = ( unsigned long ) tic ;
2009-12-21 14:03:03 +00:00
__entry - > ocnt = tic - > t_ocnt ;
__entry - > cnt = tic - > t_cnt ;
__entry - > curr_res = tic - > t_curr_res ;
__entry - > unit_res = tic - > t_unit_res ;
__entry - > flags = tic - > t_flags ;
2012-02-20 02:31:25 +00:00
__entry - > reserveq = list_empty ( & log - > l_reserve_head . waiters ) ;
__entry - > writeq = list_empty ( & log - > l_write_head . waiters ) ;
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:27 +00:00
__entry - > tail_space = READ_ONCE ( log - > l_tail_space ) ;
__entry - > grant_reserve_bytes = __entry - > tail_space +
atomic64_read ( & log - > l_reserve_head . grant ) ;
__entry - > grant_write_bytes = __entry - > tail_space +
atomic64_read ( & log - > l_write_head . grant ) ;
2009-12-21 14:03:03 +00:00
__entry - > curr_cycle = log - > l_curr_cycle ;
__entry - > curr_block = log - > l_curr_block ;
2010-12-21 01:28:39 +00:00
__entry - > tail_lsn = atomic64_read ( & log - > l_tail_lsn ) ;
2009-12-21 14:03:03 +00:00
) ,
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:27 +00:00
TP_printk ( " dev %d:%d tic 0x%lx t_ocnt %u t_cnt %u t_curr_res %u "
" t_unit_res %u t_flags %s reserveq %s writeq %s "
" tail space %llu grant_reserve_bytes %llu "
" grant_write_bytes %llu curr_cycle %d curr_block %d "
2009-12-21 14:03:03 +00:00
" tail_cycle %d tail_block %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:27 +00:00
__entry - > tic ,
2009-12-21 14:03:03 +00:00
__entry - > ocnt ,
__entry - > cnt ,
__entry - > curr_res ,
__entry - > unit_res ,
__print_flags ( __entry - > flags , " | " , XLOG_TIC_FLAGS ) ,
2010-12-21 01:02:25 +00:00
__entry - > reserveq ? " empty " : " active " ,
__entry - > writeq ? " empty " : " active " ,
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:27 +00:00
__entry - > tail_space ,
2009-12-21 14:03:03 +00:00
__entry - > grant_reserve_bytes ,
__entry - > grant_write_bytes ,
__entry - > curr_cycle ,
__entry - > curr_block ,
CYCLE_LSN ( __entry - > tail_lsn ) ,
BLOCK_LSN ( __entry - > tail_lsn )
)
)
2009-12-14 23:14:59 +00:00
2009-12-21 14:03:03 +00:00
# define DEFINE_LOGGRANT_EVENT(name) \
DEFINE_EVENT ( xfs_loggrant_class , name , \
2012-06-14 14:22:15 +00:00
TP_PROTO ( struct xlog * log , struct xlog_ticket * tic ) , \
2009-12-21 14:03:03 +00:00
TP_ARGS ( log , tic ) )
2009-12-14 23:14:59 +00:00
DEFINE_LOGGRANT_EVENT ( xfs_log_umount_write ) ;
2011-11-28 08:17:36 +00:00
DEFINE_LOGGRANT_EVENT ( xfs_log_grant_sleep ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_grant_wake ) ;
2010-12-21 01:29:01 +00:00
DEFINE_LOGGRANT_EVENT ( xfs_log_grant_wake_up ) ;
2012-02-20 02:31:31 +00:00
DEFINE_LOGGRANT_EVENT ( xfs_log_reserve ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_reserve_exit ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_regrant ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_regrant_exit ) ;
2020-03-26 01:18:23 +00:00
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_regrant ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_regrant_exit ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_regrant_sub ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_ungrant ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_ungrant_sub ) ;
DEFINE_LOGGRANT_EVENT ( xfs_log_ticket_ungrant_exit ) ;
2020-03-25 03:10:27 +00:00
DEFINE_LOGGRANT_EVENT ( xfs_log_cil_wait ) ;
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:27 +00:00
DEFINE_LOGGRANT_EVENT ( xfs_log_cil_return ) ;
2009-12-14 23:14:59 +00:00
2011-10-11 15:14:11 +00:00
DECLARE_EVENT_CLASS ( xfs_log_item_class ,
TP_PROTO ( struct xfs_log_item * lip ) ,
TP_ARGS ( lip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( void * , lip )
__field ( uint , type )
2018-05-09 14:47:34 +00:00
__field ( unsigned long , flags )
2011-10-11 15:14:11 +00:00
__field ( xfs_lsn_t , lsn )
) ,
TP_fast_assign (
2022-03-17 16:09:12 +00:00
__entry - > dev = lip - > li_log - > l_mp - > m_super - > s_dev ;
2011-10-11 15:14:11 +00:00
__entry - > lip = lip ;
__entry - > type = lip - > li_type ;
__entry - > flags = lip - > li_flags ;
__entry - > lsn = lip - > li_lsn ;
) ,
2018-01-09 19:43:36 +00:00
TP_printk ( " dev %d:%d lip %p lsn %d/%d type %s flags %s " ,
2011-10-11 15:14:11 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > lip ,
CYCLE_LSN ( __entry - > lsn ) , BLOCK_LSN ( __entry - > lsn ) ,
__print_symbolic ( __entry - > type , XFS_LI_TYPE_DESC ) ,
__print_flags ( __entry - > flags , " | " , XFS_LI_FLAGS ) )
)
2012-04-24 06:33:31 +00:00
TRACE_EVENT ( xfs_log_force ,
2016-04-05 23:46:30 +00:00
TP_PROTO ( struct xfs_mount * mp , xfs_lsn_t lsn , unsigned long caller_ip ) ,
TP_ARGS ( mp , lsn , caller_ip ) ,
2012-04-24 06:33:31 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_lsn_t , lsn )
2016-04-05 23:46:30 +00:00
__field ( unsigned long , caller_ip )
2012-04-24 06:33:31 +00:00
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > lsn = lsn ;
2016-04-05 23:46:30 +00:00
__entry - > caller_ip = caller_ip ;
2012-04-24 06:33:31 +00:00
) ,
2018-01-09 19:46:05 +00:00
TP_printk ( " dev %d:%d lsn 0x%llx caller %pS " ,
2012-04-24 06:33:31 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2016-04-05 23:46:30 +00:00
__entry - > lsn , ( void * ) __entry - > caller_ip )
2012-04-24 06:33:31 +00:00
)
2011-10-11 15:14:11 +00:00
# define DEFINE_LOG_ITEM_EVENT(name) \
DEFINE_EVENT ( xfs_log_item_class , name , \
TP_PROTO ( struct xfs_log_item * lip ) , \
TP_ARGS ( lip ) )
DEFINE_LOG_ITEM_EVENT ( xfs_ail_push ) ;
DEFINE_LOG_ITEM_EVENT ( xfs_ail_pinned ) ;
DEFINE_LOG_ITEM_EVENT ( xfs_ail_locked ) ;
xfs: on-stack delayed write buffer lists
Queue delwri buffers on a local on-stack list instead of a per-buftarg one,
and write back the buffers per-process instead of by waking up xfsbufd.
This is now easily doable given that we have very few places left that write
delwri buffers:
- log recovery:
Only done at mount time, and already forcing out the buffers
synchronously using xfs_flush_buftarg
- quotacheck:
Same story.
- dquot reclaim:
Writes out dirty dquots on the LRU under memory pressure. We might
want to look into doing more of this via xfsaild, but it's already
more optimal than the synchronous inode reclaim that writes each
buffer synchronously.
- xfsaild:
This is the main beneficiary of the change. By keeping a local list
of buffers to write we reduce latency of writing out buffers, and
more importably we can remove all the delwri list promotions which
were hitting the buffer cache hard under sustained metadata loads.
The implementation is very straight forward - xfs_buf_delwri_queue now gets
a new list_head pointer that it adds the delwri buffers to, and all callers
need to eventually submit the list using xfs_buf_delwi_submit or
xfs_buf_delwi_submit_nowait. Buffers that already are on a delwri list are
skipped in xfs_buf_delwri_queue, assuming they already are on another delwri
list. The biggest change to pass down the buffer list was done to the AIL
pushing. Now that we operate on buffers the trylock, push and pushbuf log
item methods are merged into a single push routine, which tries to lock the
item, and if possible add the buffer that needs writeback to the buffer list.
This leads to much simpler code than the previous split but requires the
individual IOP_PUSH instances to unlock and reacquire the AIL around calls
to blocking routines.
Given that xfsailds now also handle writing out buffers, the conditions for
log forcing and the sleep times needed some small changes. The most
important one is that we consider an AIL busy as long we still have buffers
to push, and the other one is that we do increment the pushed LSN for
buffers that are under flushing at this moment, but still count them towards
the stuck items for restart purposes. Without this we could hammer on stuck
items without ever forcing the log and not make progress under heavy random
delete workloads on fast flash storage devices.
[ Dave Chinner:
- rebase on previous patches.
- improved comments for XBF_DELWRI_Q handling
- fix XBF_ASYNC handling in queue submission (test 106 failure)
- rename delwri submit function buffer list parameters for clarity
- xfs_efd_item_push() should return XFS_ITEM_PINNED ]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
2012-04-23 05:58:39 +00:00
DEFINE_LOG_ITEM_EVENT ( xfs_ail_flushing ) ;
xfs: intent item whiteouts
When we log modifications based on intents, we add both intent
and intent done items to the modification being made. These get
written to the log to ensure that the operation is re-run if the
intent done is not found in the log.
However, for operations that complete wholly within a single
checkpoint, the change in the checkpoint is atomic and will never
need replay. In this case, we don't need to actually write the
intent and intent done items to the journal because log recovery
will never need to manually restart this modification.
Log recovery currently handles intent/intent done matching by
inserting the intent into the AIL, then removing it when a matching
intent done item is found. Hence for all the intent-based operations
that complete within a checkpoint, we spend all that time parsing
the intent/intent done items just to cancel them and do nothing with
them.
Hence it follows that the only time we actually need intents in the
log is when the modification crosses checkpoint boundaries in the
log and so may only be partially complete in the journal. Hence if
we commit and intent done item to the CIL and the intent item is in
the same checkpoint, we don't actually have to write them to the
journal because log recovery will always cancel the intents.
We've never really worried about the overhead of logging intents
unnecessarily like this because the intents we log are generally
very much smaller than the change being made. e.g. freeing an extent
involves modifying at lease two freespace btree blocks and the AGF,
so the EFI/EFD overhead is only a small increase in space and
processing time compared to the overall cost of freeing an extent.
However, delayed attributes change this cost equation dramatically,
especially for inline attributes. In the case of adding an inline
attribute, we only log the inode core and attribute fork at present.
With delayed attributes, we now log the attr intent which includes
the name and value, the inode core adn attr fork, and finally the
attr intent done item. We increase the number of items we log from 1
to 3, and the number of log vectors (regions) goes up from 3 to 7.
Hence we tripple the number of objects that the CIL has to process,
and more than double the number of log vectors that need to be
written to the journal.
At scale, this means delayed attributes cause a non-pipelined CIL to
become CPU bound processing all the extra items, resulting in a > 40%
performance degradation on 16-way file+xattr create worklaods.
Pipelining the CIL (as per 5.15) reduces the performance degradation
to 20%, but now the limitation is the rate at which the log items
can be written to the iclogs and iclogs be dispatched for IO and
completed.
Even log IO completion is slowed down by these intents, because it
now has to process 3x the number of items in the checkpoint.
Processing completed intents is especially inefficient here, because
we first insert the intent into the AIL, then remove it from the AIL
when the intent done is processed. IOWs, we are also doing expensive
operations in log IO completion we could completely avoid if we
didn't log completed intent/intent done pairs.
Enter log item whiteouts.
When an intent done is committed, we can check to see if the
associated intent is in the same checkpoint as we are currently
committing the intent done to. If so, we can mark the intent log
item with a whiteout and immediately free the intent done item
rather than committing it to the CIL. We can basically skip the
entire formatting and CIL insertion steps for the intent done item.
However, we cannot remove the intent item from the CIL at this point
because the unlocked per-cpu CIL item lists do not permit removal
without holding the CIL context lock exclusively. Transaction commit
only holds the context lock shared, hence the best we can do is mark
the intent item with a whiteout so that the CIL push can release it
rather than writing it to the log.
This means we never write the intent to the log if the intent done
has also been committed to the same checkpoint, but we'll always
write the intent if the intent done has not been committed or has
been committed to a different checkpoint. This will result in
correct log recovery behaviour in all cases, without the overhead of
logging unnecessary intents.
This intent whiteout concept is generic - we can apply it to all
intent/intent done pairs that have a direct 1:1 relationship. The
way deferred ops iterate and relog intents mean that all intents
currently have a 1:1 relationship with their done intent, and hence
we can apply this cancellation to all existing intent/intent done
implementations.
For delayed attributes with a 16-way 64kB xattr create workload,
whiteouts reduce the amount of journalled metadata from ~2.5GB/s
down to ~600MB/s and improve the creation rate from 9000/s to
14000/s.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-04 01:50:29 +00:00
DEFINE_LOG_ITEM_EVENT ( xfs_cil_whiteout_mark ) ;
DEFINE_LOG_ITEM_EVENT ( xfs_cil_whiteout_skip ) ;
DEFINE_LOG_ITEM_EVENT ( xfs_cil_whiteout_unpin ) ;
2011-10-11 15:14:11 +00:00
2013-11-01 04:27:18 +00:00
DECLARE_EVENT_CLASS ( xfs_ail_class ,
TP_PROTO ( struct xfs_log_item * lip , xfs_lsn_t old_lsn , xfs_lsn_t new_lsn ) ,
TP_ARGS ( lip , old_lsn , new_lsn ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( void * , lip )
__field ( uint , type )
2018-05-09 14:47:34 +00:00
__field ( unsigned long , flags )
2013-11-01 04:27:18 +00:00
__field ( xfs_lsn_t , old_lsn )
__field ( xfs_lsn_t , new_lsn )
) ,
TP_fast_assign (
2022-03-17 16:09:12 +00:00
__entry - > dev = lip - > li_log - > l_mp - > m_super - > s_dev ;
2013-11-01 04:27:18 +00:00
__entry - > lip = lip ;
__entry - > type = lip - > li_type ;
__entry - > flags = lip - > li_flags ;
__entry - > old_lsn = old_lsn ;
__entry - > new_lsn = new_lsn ;
) ,
2018-01-09 19:43:36 +00:00
TP_printk ( " dev %d:%d lip %p old lsn %d/%d new lsn %d/%d type %s flags %s " ,
2013-11-01 04:27:18 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > lip ,
CYCLE_LSN ( __entry - > old_lsn ) , BLOCK_LSN ( __entry - > old_lsn ) ,
CYCLE_LSN ( __entry - > new_lsn ) , BLOCK_LSN ( __entry - > new_lsn ) ,
__print_symbolic ( __entry - > type , XFS_LI_TYPE_DESC ) ,
__print_flags ( __entry - > flags , " | " , XFS_LI_FLAGS ) )
)
# define DEFINE_AIL_EVENT(name) \
DEFINE_EVENT ( xfs_ail_class , name , \
TP_PROTO ( struct xfs_log_item * lip , xfs_lsn_t old_lsn , xfs_lsn_t new_lsn ) , \
TP_ARGS ( lip , old_lsn , new_lsn ) )
DEFINE_AIL_EVENT ( xfs_ail_insert ) ;
DEFINE_AIL_EVENT ( xfs_ail_move ) ;
DEFINE_AIL_EVENT ( xfs_ail_delete ) ;
TRACE_EVENT ( xfs_log_assign_tail_lsn ,
TP_PROTO ( struct xlog * log , xfs_lsn_t new_lsn ) ,
TP_ARGS ( log , new_lsn ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_lsn_t , new_lsn )
__field ( xfs_lsn_t , old_lsn )
xfs: l_last_sync_lsn is really AIL state
The current implementation of xlog_assign_tail_lsn() assumes that
when the AIL is empty, the log tail matches the LSN of the last
written commit record. This is recorded in xlog_state_set_callback()
as log->l_last_sync_lsn when the iclog state changes to
XLOG_STATE_CALLBACK. This change is then immediately followed by
running the callbacks on the iclog which then insert the log items
into the AIL at the "commit lsn" of that checkpoint.
The AIL tracks log items via the start record LSN of the checkpoint,
not the commit record LSN. This is because we can pipeline multiple
checkpoints, and so the start record of checkpoint N+1 can be
written before the commit record of checkpoint N. i.e:
start N commit N
+-------------+------------+----------------+
start N+1 commit N+1
The tail of the log cannot be moved to the LSN of commit N when all
the items of that checkpoint are written back, because then the
start record for N+1 is no longer in the active portion of the log
and recovery will fail/corrupt the filesystem.
Hence when all the log items in checkpoint N are written back, the
tail of the log most now only move as far forwards as the start LSN
of checkpoint N+1.
Hence we cannot use the maximum start record LSN the AIL sees as a
replacement the pointer to the current head of the on-disk log
records. However, we currently only use the l_last_sync_lsn when the
AIL is empty - when there is no start LSN remaining, the tail of the
log moves to the LSN of the last commit record as this is where
recovery needs to start searching for recoverable records. THe next
checkpoint will have a start record LSN that is higher than
l_last_sync_lsn, and so everything still works correctly when new
checkpoints are written to an otherwise empty log.
l_last_sync_lsn is an atomic variable because it is currently
updated when an iclog with callbacks attached moves to the CALLBACK
state. While we hold the icloglock at this point, we don't hold the
AIL lock. When we assign the log tail, we hold the AIL lock, not the
icloglock because we have to look up the AIL. Hence it is an atomic
variable so it's not bound to a specific lock context.
However, the iclog callbacks are only used for CIL checkpoints. We
don't use callbacks with unmount record writes, so the
l_last_sync_lsn variable only gets updated when we are processing
CIL checkpoint callbacks. And those callbacks run under AIL lock
contexts, not icloglock context. The CIL checkpoint already knows
what the LSN of the iclog the commit record was written to (obtained
when written into the iclog before submission) and so we can update
the l_last_sync_lsn under the AIL lock in this callback. No other
iclog callbacks will run until the currently executing one
completes, and hence we can update the l_last_sync_lsn under the AIL
lock safely.
This means l_last_sync_lsn can move to the AIL as the "ail_head_lsn"
and it can be used to replace the atomic l_last_sync_lsn in the
iclog code. This makes tracking the log tail belong entirely to the
AIL, rather than being smeared across log, iclog and AIL state and
locking.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:23 +00:00
__field ( xfs_lsn_t , head_lsn )
2013-11-01 04:27:18 +00:00
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > new_lsn = new_lsn ;
__entry - > old_lsn = atomic64_read ( & log - > l_tail_lsn ) ;
xfs: l_last_sync_lsn is really AIL state
The current implementation of xlog_assign_tail_lsn() assumes that
when the AIL is empty, the log tail matches the LSN of the last
written commit record. This is recorded in xlog_state_set_callback()
as log->l_last_sync_lsn when the iclog state changes to
XLOG_STATE_CALLBACK. This change is then immediately followed by
running the callbacks on the iclog which then insert the log items
into the AIL at the "commit lsn" of that checkpoint.
The AIL tracks log items via the start record LSN of the checkpoint,
not the commit record LSN. This is because we can pipeline multiple
checkpoints, and so the start record of checkpoint N+1 can be
written before the commit record of checkpoint N. i.e:
start N commit N
+-------------+------------+----------------+
start N+1 commit N+1
The tail of the log cannot be moved to the LSN of commit N when all
the items of that checkpoint are written back, because then the
start record for N+1 is no longer in the active portion of the log
and recovery will fail/corrupt the filesystem.
Hence when all the log items in checkpoint N are written back, the
tail of the log most now only move as far forwards as the start LSN
of checkpoint N+1.
Hence we cannot use the maximum start record LSN the AIL sees as a
replacement the pointer to the current head of the on-disk log
records. However, we currently only use the l_last_sync_lsn when the
AIL is empty - when there is no start LSN remaining, the tail of the
log moves to the LSN of the last commit record as this is where
recovery needs to start searching for recoverable records. THe next
checkpoint will have a start record LSN that is higher than
l_last_sync_lsn, and so everything still works correctly when new
checkpoints are written to an otherwise empty log.
l_last_sync_lsn is an atomic variable because it is currently
updated when an iclog with callbacks attached moves to the CALLBACK
state. While we hold the icloglock at this point, we don't hold the
AIL lock. When we assign the log tail, we hold the AIL lock, not the
icloglock because we have to look up the AIL. Hence it is an atomic
variable so it's not bound to a specific lock context.
However, the iclog callbacks are only used for CIL checkpoints. We
don't use callbacks with unmount record writes, so the
l_last_sync_lsn variable only gets updated when we are processing
CIL checkpoint callbacks. And those callbacks run under AIL lock
contexts, not icloglock context. The CIL checkpoint already knows
what the LSN of the iclog the commit record was written to (obtained
when written into the iclog before submission) and so we can update
the l_last_sync_lsn under the AIL lock in this callback. No other
iclog callbacks will run until the currently executing one
completes, and hence we can update the l_last_sync_lsn under the AIL
lock safely.
This means l_last_sync_lsn can move to the AIL as the "ail_head_lsn"
and it can be used to replace the atomic l_last_sync_lsn in the
iclog code. This makes tracking the log tail belong entirely to the
AIL, rather than being smeared across log, iclog and AIL state and
locking.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:23 +00:00
__entry - > head_lsn = log - > l_ailp - > ail_head_lsn ;
2013-11-01 04:27:18 +00:00
) ,
xfs: l_last_sync_lsn is really AIL state
The current implementation of xlog_assign_tail_lsn() assumes that
when the AIL is empty, the log tail matches the LSN of the last
written commit record. This is recorded in xlog_state_set_callback()
as log->l_last_sync_lsn when the iclog state changes to
XLOG_STATE_CALLBACK. This change is then immediately followed by
running the callbacks on the iclog which then insert the log items
into the AIL at the "commit lsn" of that checkpoint.
The AIL tracks log items via the start record LSN of the checkpoint,
not the commit record LSN. This is because we can pipeline multiple
checkpoints, and so the start record of checkpoint N+1 can be
written before the commit record of checkpoint N. i.e:
start N commit N
+-------------+------------+----------------+
start N+1 commit N+1
The tail of the log cannot be moved to the LSN of commit N when all
the items of that checkpoint are written back, because then the
start record for N+1 is no longer in the active portion of the log
and recovery will fail/corrupt the filesystem.
Hence when all the log items in checkpoint N are written back, the
tail of the log most now only move as far forwards as the start LSN
of checkpoint N+1.
Hence we cannot use the maximum start record LSN the AIL sees as a
replacement the pointer to the current head of the on-disk log
records. However, we currently only use the l_last_sync_lsn when the
AIL is empty - when there is no start LSN remaining, the tail of the
log moves to the LSN of the last commit record as this is where
recovery needs to start searching for recoverable records. THe next
checkpoint will have a start record LSN that is higher than
l_last_sync_lsn, and so everything still works correctly when new
checkpoints are written to an otherwise empty log.
l_last_sync_lsn is an atomic variable because it is currently
updated when an iclog with callbacks attached moves to the CALLBACK
state. While we hold the icloglock at this point, we don't hold the
AIL lock. When we assign the log tail, we hold the AIL lock, not the
icloglock because we have to look up the AIL. Hence it is an atomic
variable so it's not bound to a specific lock context.
However, the iclog callbacks are only used for CIL checkpoints. We
don't use callbacks with unmount record writes, so the
l_last_sync_lsn variable only gets updated when we are processing
CIL checkpoint callbacks. And those callbacks run under AIL lock
contexts, not icloglock context. The CIL checkpoint already knows
what the LSN of the iclog the commit record was written to (obtained
when written into the iclog before submission) and so we can update
the l_last_sync_lsn under the AIL lock in this callback. No other
iclog callbacks will run until the currently executing one
completes, and hence we can update the l_last_sync_lsn under the AIL
lock safely.
This means l_last_sync_lsn can move to the AIL as the "ail_head_lsn"
and it can be used to replace the atomic l_last_sync_lsn in the
iclog code. This makes tracking the log tail belong entirely to the
AIL, rather than being smeared across log, iclog and AIL state and
locking.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:23 +00:00
TP_printk ( " dev %d:%d new tail lsn %d/%d, old lsn %d/%d, head lsn %d/%d " ,
2013-11-01 04:27:18 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
CYCLE_LSN ( __entry - > new_lsn ) , BLOCK_LSN ( __entry - > new_lsn ) ,
CYCLE_LSN ( __entry - > old_lsn ) , BLOCK_LSN ( __entry - > old_lsn ) ,
xfs: l_last_sync_lsn is really AIL state
The current implementation of xlog_assign_tail_lsn() assumes that
when the AIL is empty, the log tail matches the LSN of the last
written commit record. This is recorded in xlog_state_set_callback()
as log->l_last_sync_lsn when the iclog state changes to
XLOG_STATE_CALLBACK. This change is then immediately followed by
running the callbacks on the iclog which then insert the log items
into the AIL at the "commit lsn" of that checkpoint.
The AIL tracks log items via the start record LSN of the checkpoint,
not the commit record LSN. This is because we can pipeline multiple
checkpoints, and so the start record of checkpoint N+1 can be
written before the commit record of checkpoint N. i.e:
start N commit N
+-------------+------------+----------------+
start N+1 commit N+1
The tail of the log cannot be moved to the LSN of commit N when all
the items of that checkpoint are written back, because then the
start record for N+1 is no longer in the active portion of the log
and recovery will fail/corrupt the filesystem.
Hence when all the log items in checkpoint N are written back, the
tail of the log most now only move as far forwards as the start LSN
of checkpoint N+1.
Hence we cannot use the maximum start record LSN the AIL sees as a
replacement the pointer to the current head of the on-disk log
records. However, we currently only use the l_last_sync_lsn when the
AIL is empty - when there is no start LSN remaining, the tail of the
log moves to the LSN of the last commit record as this is where
recovery needs to start searching for recoverable records. THe next
checkpoint will have a start record LSN that is higher than
l_last_sync_lsn, and so everything still works correctly when new
checkpoints are written to an otherwise empty log.
l_last_sync_lsn is an atomic variable because it is currently
updated when an iclog with callbacks attached moves to the CALLBACK
state. While we hold the icloglock at this point, we don't hold the
AIL lock. When we assign the log tail, we hold the AIL lock, not the
icloglock because we have to look up the AIL. Hence it is an atomic
variable so it's not bound to a specific lock context.
However, the iclog callbacks are only used for CIL checkpoints. We
don't use callbacks with unmount record writes, so the
l_last_sync_lsn variable only gets updated when we are processing
CIL checkpoint callbacks. And those callbacks run under AIL lock
contexts, not icloglock context. The CIL checkpoint already knows
what the LSN of the iclog the commit record was written to (obtained
when written into the iclog before submission) and so we can update
the l_last_sync_lsn under the AIL lock in this callback. No other
iclog callbacks will run until the currently executing one
completes, and hence we can update the l_last_sync_lsn under the AIL
lock safely.
This means l_last_sync_lsn can move to the AIL as the "ail_head_lsn"
and it can be used to replace the atomic l_last_sync_lsn in the
iclog code. This makes tracking the log tail belong entirely to the
AIL, rather than being smeared across log, iclog and AIL state and
locking.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 07:21:23 +00:00
CYCLE_LSN ( __entry - > head_lsn ) , BLOCK_LSN ( __entry - > head_lsn ) )
2013-11-01 04:27:18 +00:00
)
2011-10-11 15:14:11 +00:00
2010-05-24 08:25:57 +00:00
DECLARE_EVENT_CLASS ( xfs_file_class ,
2021-01-23 18:06:29 +00:00
TP_PROTO ( struct kiocb * iocb , struct iov_iter * iter ) ,
TP_ARGS ( iocb , iter ) ,
2010-05-24 08:25:57 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fsize_t , size )
__field ( loff_t , offset )
__field ( size_t , count )
) ,
TP_fast_assign (
2021-01-23 18:06:29 +00:00
__entry - > dev = file_inode ( iocb - > ki_filp ) - > i_sb - > s_dev ;
__entry - > ino = XFS_I ( file_inode ( iocb - > ki_filp ) ) - > i_ino ;
2021-03-29 18:11:40 +00:00
__entry - > size = XFS_I ( file_inode ( iocb - > ki_filp ) ) - > i_disk_size ;
2021-01-23 18:06:29 +00:00
__entry - > offset = iocb - > ki_pos ;
__entry - > count = iov_iter_count ( iter ) ;
2010-05-24 08:25:57 +00:00
) ,
2021-08-17 20:03:19 +00:00
TP_printk ( " dev %d:%d ino 0x%llx disize 0x%llx pos 0x%llx bytecount 0x%zx " ,
2010-05-24 08:25:57 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
__entry - > offset ,
2016-07-20 01:31:42 +00:00
__entry - > count )
2009-12-14 23:14:59 +00:00
)
2010-05-24 08:25:57 +00:00
# define DEFINE_RW_EVENT(name) \
DEFINE_EVENT ( xfs_file_class , name , \
2021-01-23 18:06:29 +00:00
TP_PROTO ( struct kiocb * iocb , struct iov_iter * iter ) , \
TP_ARGS ( iocb , iter ) )
2016-07-20 01:31:42 +00:00
DEFINE_RW_EVENT ( xfs_file_buffered_read ) ;
DEFINE_RW_EVENT ( xfs_file_direct_read ) ;
2016-07-20 01:38:55 +00:00
DEFINE_RW_EVENT ( xfs_file_dax_read ) ;
2009-12-14 23:14:59 +00:00
DEFINE_RW_EVENT ( xfs_file_buffered_write ) ;
DEFINE_RW_EVENT ( xfs_file_direct_write ) ;
2016-07-20 01:38:55 +00:00
DEFINE_RW_EVENT ( xfs_file_dax_write ) ;
2021-01-23 18:06:30 +00:00
DEFINE_RW_EVENT ( xfs_reflink_bounce_dio_write ) ;
2010-12-10 08:42:20 +00:00
DECLARE_EVENT_CLASS ( xfs_imap_class ,
2010-05-24 08:25:57 +00:00
TP_PROTO ( struct xfs_inode * ip , xfs_off_t offset , ssize_t count ,
2019-02-15 16:02:46 +00:00
int whichfork , struct xfs_bmbt_irec * irec ) ,
TP_ARGS ( ip , offset , count , whichfork , irec ) ,
2010-05-24 08:25:57 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( loff_t , size )
__field ( loff_t , offset )
__field ( size_t , count )
2019-02-15 16:02:46 +00:00
__field ( int , whichfork )
2010-05-24 08:25:57 +00:00
__field ( xfs_fileoff_t , startoff )
__field ( xfs_fsblock_t , startblock )
__field ( xfs_filblks_t , blockcount )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2021-03-29 18:11:40 +00:00
__entry - > size = ip - > i_disk_size ;
2010-05-24 08:25:57 +00:00
__entry - > offset = offset ;
__entry - > count = count ;
2019-02-15 16:02:46 +00:00
__entry - > whichfork = whichfork ;
2010-05-24 08:25:57 +00:00
__entry - > startoff = irec ? irec - > br_startoff : 0 ;
__entry - > startblock = irec ? irec - > br_startblock : 0 ;
__entry - > blockcount = irec ? irec - > br_blockcount : 0 ;
) ,
2021-08-17 20:03:19 +00:00
TP_printk ( " dev %d:%d ino 0x%llx disize 0x%llx pos 0x%llx bytecount 0x%zx "
2021-08-17 19:45:59 +00:00
" fork %s startoff 0x%llx startblock 0x%llx fsbcount 0x%llx " ,
2010-05-24 08:25:57 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
__entry - > offset ,
__entry - > count ,
2021-08-17 20:09:26 +00:00
__print_symbolic ( __entry - > whichfork , XFS_WHICHFORK_STRINGS ) ,
2010-05-24 08:25:57 +00:00
__entry - > startoff ,
2017-06-16 18:00:05 +00:00
( int64_t ) __entry - > startblock ,
2010-05-24 08:25:57 +00:00
__entry - > blockcount )
2009-12-14 23:14:59 +00:00
)
2010-05-24 08:25:57 +00:00
2019-02-15 16:02:46 +00:00
# define DEFINE_IMAP_EVENT(name) \
2010-12-10 08:42:20 +00:00
DEFINE_EVENT ( xfs_imap_class , name , \
2010-05-24 08:25:57 +00:00
TP_PROTO ( struct xfs_inode * ip , xfs_off_t offset , ssize_t count , \
2019-02-15 16:02:46 +00:00
int whichfork , struct xfs_bmbt_irec * irec ) , \
TP_ARGS ( ip , offset , count , whichfork , irec ) )
DEFINE_IMAP_EVENT ( xfs_map_blocks_found ) ;
DEFINE_IMAP_EVENT ( xfs_map_blocks_alloc ) ;
DEFINE_IMAP_EVENT ( xfs_iomap_alloc ) ;
DEFINE_IMAP_EVENT ( xfs_iomap_found ) ;
2009-12-14 23:14:59 +00:00
2010-05-24 08:25:57 +00:00
DECLARE_EVENT_CLASS ( xfs_simple_io_class ,
TP_PROTO ( struct xfs_inode * ip , xfs_off_t offset , ssize_t count ) ,
TP_ARGS ( ip , offset , count ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
2011-07-18 03:40:19 +00:00
__field ( loff_t , isize )
__field ( loff_t , disize )
2010-05-24 08:25:57 +00:00
__field ( loff_t , offset )
__field ( size_t , count )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2011-12-18 20:00:11 +00:00
__entry - > isize = VFS_I ( ip ) - > i_size ;
2021-03-29 18:11:40 +00:00
__entry - > disize = ip - > i_disk_size ;
2010-05-24 08:25:57 +00:00
__entry - > offset = offset ;
__entry - > count = count ;
) ,
2011-12-18 20:00:12 +00:00
TP_printk ( " dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx "
2021-08-17 20:00:13 +00:00
" pos 0x%llx bytecount 0x%zx " ,
2010-05-24 08:25:57 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
2011-07-18 03:40:19 +00:00
__entry - > isize ,
__entry - > disize ,
2010-05-24 08:25:57 +00:00
__entry - > offset ,
__entry - > count )
2009-12-14 23:14:59 +00:00
) ;
2010-05-24 08:25:57 +00:00
# define DEFINE_SIMPLE_IO_EVENT(name) \
DEFINE_EVENT ( xfs_simple_io_class , name , \
TP_PROTO ( struct xfs_inode * ip , xfs_off_t offset , ssize_t count ) , \
TP_ARGS ( ip , offset , count ) )
2009-12-14 23:14:59 +00:00
DEFINE_SIMPLE_IO_EVENT ( xfs_delalloc_enospc ) ;
DEFINE_SIMPLE_IO_EVENT ( xfs_unwritten_convert ) ;
2011-07-18 03:40:19 +00:00
DEFINE_SIMPLE_IO_EVENT ( xfs_setfilesize ) ;
2015-10-12 05:02:08 +00:00
DEFINE_SIMPLE_IO_EVENT ( xfs_zero_eof ) ;
2016-02-08 03:40:51 +00:00
DEFINE_SIMPLE_IO_EVENT ( xfs_end_io_direct_write ) ;
DEFINE_SIMPLE_IO_EVENT ( xfs_end_io_direct_write_unwritten ) ;
DEFINE_SIMPLE_IO_EVENT ( xfs_end_io_direct_write_append ) ;
2023-05-22 13:50:11 +00:00
DEFINE_SIMPLE_IO_EVENT ( xfs_file_splice_read ) ;
2009-12-14 23:14:59 +00:00
2009-12-21 14:03:03 +00:00
DECLARE_EVENT_CLASS ( xfs_itrunc_class ,
TP_PROTO ( struct xfs_inode * ip , xfs_fsize_t new_size ) ,
TP_ARGS ( ip , new_size ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fsize_t , size )
__field ( xfs_fsize_t , new_size )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2021-03-29 18:11:40 +00:00
__entry - > size = ip - > i_disk_size ;
2009-12-21 14:03:03 +00:00
__entry - > new_size = new_size ;
) ,
2021-08-17 20:03:19 +00:00
TP_printk ( " dev %d:%d ino 0x%llx disize 0x%llx new_size 0x%llx " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
__entry - > new_size )
)
2009-12-14 23:14:59 +00:00
# define DEFINE_ITRUNC_EVENT(name) \
2009-12-21 14:03:03 +00:00
DEFINE_EVENT ( xfs_itrunc_class , name , \
2009-12-14 23:14:59 +00:00
TP_PROTO ( struct xfs_inode * ip , xfs_fsize_t new_size ) , \
2009-12-21 14:03:03 +00:00
TP_ARGS ( ip , new_size ) )
2011-12-18 20:00:04 +00:00
DEFINE_ITRUNC_EVENT ( xfs_itruncate_extents_start ) ;
DEFINE_ITRUNC_EVENT ( xfs_itruncate_extents_end ) ;
2009-12-14 23:14:59 +00:00
TRACE_EVENT ( xfs_pagecache_inval ,
TP_PROTO ( struct xfs_inode * ip , xfs_off_t start , xfs_off_t finish ) ,
TP_ARGS ( ip , start , finish ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fsize_t , size )
__field ( xfs_off_t , start )
__field ( xfs_off_t , finish )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2021-03-29 18:11:40 +00:00
__entry - > size = ip - > i_disk_size ;
2009-12-14 23:14:59 +00:00
__entry - > start = start ;
__entry - > finish = finish ;
) ,
2021-08-17 20:03:19 +00:00
TP_printk ( " dev %d:%d ino 0x%llx disize 0x%llx start 0x%llx finish 0x%llx " ,
2009-12-14 23:14:59 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
__entry - > start ,
__entry - > finish )
) ;
TRACE_EVENT ( xfs_bunmap ,
2021-08-17 17:03:45 +00:00
TP_PROTO ( struct xfs_inode * ip , xfs_fileoff_t fileoff , xfs_filblks_t len ,
2009-12-14 23:14:59 +00:00
int flags , unsigned long caller_ip ) ,
2021-08-17 17:03:45 +00:00
TP_ARGS ( ip , fileoff , len , flags , caller_ip ) ,
2009-12-14 23:14:59 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fsize_t , size )
2021-08-17 17:03:45 +00:00
__field ( xfs_fileoff_t , fileoff )
2009-12-14 23:14:59 +00:00
__field ( xfs_filblks_t , len )
__field ( unsigned long , caller_ip )
__field ( int , flags )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
2021-03-29 18:11:40 +00:00
__entry - > size = ip - > i_disk_size ;
2021-08-17 17:03:45 +00:00
__entry - > fileoff = fileoff ;
2009-12-14 23:14:59 +00:00
__entry - > len = len ;
__entry - > caller_ip = caller_ip ;
__entry - > flags = flags ;
) ,
2022-09-18 20:51:14 +00:00
TP_printk ( " dev %d:%d ino 0x%llx disize 0x%llx fileoff 0x%llx fsbcount 0x%llx "
2018-01-09 19:46:05 +00:00
" flags %s caller %pS " ,
2009-12-14 23:14:59 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
2021-08-17 17:03:45 +00:00
__entry - > fileoff ,
2009-12-14 23:14:59 +00:00
__entry - > len ,
__print_flags ( __entry - > flags , " | " , XFS_BMAPI_FLAGS ) ,
( void * ) __entry - > caller_ip )
) ;
2012-04-29 10:41:10 +00:00
DECLARE_EVENT_CLASS ( xfs_extent_busy_class ,
2024-11-04 04:18:43 +00:00
TP_PROTO ( const struct xfs_group * xg , xfs_agblock_t agbno ,
2024-11-04 04:18:36 +00:00
xfs_extlen_t len ) ,
2024-11-04 04:18:43 +00:00
TP_ARGS ( xg , agbno , len ) ,
2009-12-14 23:14:59 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-11-04 04:18:43 +00:00
__field ( enum xfs_group_type , type )
2009-12-14 23:14:59 +00:00
__field ( xfs_agnumber_t , agno )
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 02:07:08 +00:00
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
2009-12-14 23:14:59 +00:00
) ,
TP_fast_assign (
2024-11-04 04:18:43 +00:00
__entry - > dev = xg - > xg_mount - > m_super - > s_dev ;
__entry - > type = xg - > xg_type ;
__entry - > agno = xg - > xg_gno ;
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 02:07:08 +00:00
__entry - > agbno = agbno ;
__entry - > len = len ;
2009-12-14 23:14:59 +00:00
) ,
2024-11-04 04:18:43 +00:00
TP_printk ( " dev %d:%d %sno 0x%x %sbno 0x%x fsbcount 0x%x " ,
2009-12-14 23:14:59 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-11-04 04:18:43 +00:00
__print_symbolic ( __entry - > type , XG_TYPE_STRINGS ) ,
2009-12-14 23:14:59 +00:00
__entry - > agno ,
2024-11-04 04:18:43 +00:00
__print_symbolic ( __entry - > type , XG_TYPE_STRINGS ) ,
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 02:07:08 +00:00
__entry - > agbno ,
__entry - > len )
2009-12-14 23:14:59 +00:00
) ;
2011-04-24 19:06:16 +00:00
# define DEFINE_BUSY_EVENT(name) \
2012-04-29 10:41:10 +00:00
DEFINE_EVENT ( xfs_extent_busy_class , name , \
2024-11-04 04:18:43 +00:00
TP_PROTO ( const struct xfs_group * xg , xfs_agblock_t agbno , \
xfs_extlen_t len ) , \
TP_ARGS ( xg , agbno , len ) )
2012-04-29 10:41:10 +00:00
DEFINE_BUSY_EVENT ( xfs_extent_busy ) ;
DEFINE_BUSY_EVENT ( xfs_extent_busy_force ) ;
DEFINE_BUSY_EVENT ( xfs_extent_busy_reuse ) ;
DEFINE_BUSY_EVENT ( xfs_extent_busy_clear ) ;
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 02:07:08 +00:00
2012-04-29 10:41:10 +00:00
TRACE_EVENT ( xfs_extent_busy_trim ,
2024-11-04 04:18:43 +00:00
TP_PROTO ( const struct xfs_group * xg , xfs_agblock_t agbno ,
2024-11-04 04:18:36 +00:00
xfs_extlen_t len , xfs_agblock_t tbno , xfs_extlen_t tlen ) ,
2024-11-04 04:18:43 +00:00
TP_ARGS ( xg , agbno , len , tbno , tlen ) ,
2011-04-24 19:06:15 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-11-04 04:18:43 +00:00
__field ( enum xfs_group_type , type )
2011-04-24 19:06:15 +00:00
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
__field ( xfs_agblock_t , tbno )
__field ( xfs_extlen_t , tlen )
) ,
TP_fast_assign (
2024-11-04 04:18:43 +00:00
__entry - > dev = xg - > xg_mount - > m_super - > s_dev ;
__entry - > type = xg - > xg_type ;
__entry - > agno = xg - > xg_gno ;
2011-04-24 19:06:15 +00:00
__entry - > agbno = agbno ;
__entry - > len = len ;
__entry - > tbno = tbno ;
__entry - > tlen = tlen ;
) ,
2024-11-04 04:18:43 +00:00
TP_printk ( " dev %d:%d %sno 0x%x %sbno 0x%x fsbcount 0x%x found_agbno 0x%x found_fsbcount 0x%x " ,
2011-04-24 19:06:15 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-11-04 04:18:43 +00:00
__print_symbolic ( __entry - > type , XG_TYPE_STRINGS ) ,
2011-04-24 19:06:15 +00:00
__entry - > agno ,
2024-11-04 04:18:43 +00:00
__print_symbolic ( __entry - > type , XG_TYPE_STRINGS ) ,
2011-04-24 19:06:15 +00:00
__entry - > agbno ,
__entry - > len ,
__entry - > tbno ,
__entry - > tlen )
) ;
2024-11-04 04:19:36 +00:00
# ifdef CONFIG_XFS_RT
TRACE_EVENT ( xfs_rtalloc_extent_busy ,
TP_PROTO ( struct xfs_rtgroup * rtg , xfs_rtxnum_t start ,
xfs_rtxlen_t minlen , xfs_rtxlen_t maxlen ,
xfs_rtxlen_t len , xfs_rtxlen_t prod , xfs_rtxnum_t rtx ,
unsigned busy_gen ) ,
TP_ARGS ( rtg , start , minlen , maxlen , len , prod , rtx , busy_gen ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_rgnumber_t , rgno )
__field ( xfs_rtxnum_t , start )
__field ( xfs_rtxlen_t , minlen )
__field ( xfs_rtxlen_t , maxlen )
__field ( xfs_rtxlen_t , mod )
__field ( xfs_rtxlen_t , prod )
__field ( xfs_rtxlen_t , len )
__field ( xfs_rtxnum_t , rtx )
__field ( unsigned , busy_gen )
) ,
TP_fast_assign (
__entry - > dev = rtg_mount ( rtg ) - > m_super - > s_dev ;
__entry - > rgno = rtg_rgno ( rtg ) ;
__entry - > start = start ;
__entry - > minlen = minlen ;
__entry - > maxlen = maxlen ;
__entry - > prod = prod ;
__entry - > len = len ;
__entry - > rtx = rtx ;
__entry - > busy_gen = busy_gen ;
) ,
TP_printk ( " dev %d:%d rgno 0x%x startrtx 0x%llx minlen %u maxlen %u "
" prod %u len %u rtx 0%llx busy_gen 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > rgno ,
__entry - > start ,
__entry - > minlen ,
__entry - > maxlen ,
__entry - > prod ,
__entry - > len ,
__entry - > rtx ,
__entry - > busy_gen )
)
TRACE_EVENT ( xfs_rtalloc_extent_busy_trim ,
TP_PROTO ( struct xfs_rtgroup * rtg , xfs_rtxnum_t old_rtx ,
xfs_rtxlen_t old_len , xfs_rtxnum_t new_rtx ,
xfs_rtxlen_t new_len ) ,
TP_ARGS ( rtg , old_rtx , old_len , new_rtx , new_len ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_rgnumber_t , rgno )
__field ( xfs_rtxnum_t , old_rtx )
__field ( xfs_rtxnum_t , new_rtx )
__field ( xfs_rtxlen_t , old_len )
__field ( xfs_rtxlen_t , new_len )
) ,
TP_fast_assign (
__entry - > dev = rtg_mount ( rtg ) - > m_super - > s_dev ;
__entry - > rgno = rtg_rgno ( rtg ) ;
__entry - > old_rtx = old_rtx ;
__entry - > old_len = old_len ;
__entry - > new_rtx = new_rtx ;
__entry - > new_len = new_len ;
) ,
TP_printk ( " dev %d:%d rgno 0x%x rtx 0x%llx rtxcount 0x%x -> rtx 0x%llx rtxcount 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > rgno ,
__entry - > old_rtx ,
__entry - > old_len ,
__entry - > new_rtx ,
__entry - > new_len )
) ;
# endif /* CONFIG_XFS_RT */
2018-03-15 17:51:58 +00:00
DECLARE_EVENT_CLASS ( xfs_agf_class ,
2009-12-14 23:14:59 +00:00
TP_PROTO ( struct xfs_mount * mp , struct xfs_agf * agf , int flags ,
unsigned long caller_ip ) ,
TP_ARGS ( mp , agf , flags , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( int , flags )
__field ( __u32 , length )
__field ( __u32 , bno_root )
__field ( __u32 , cnt_root )
__field ( __u32 , bno_level )
__field ( __u32 , cnt_level )
__field ( __u32 , flfirst )
__field ( __u32 , fllast )
__field ( __u32 , flcount )
__field ( __u32 , freeblks )
__field ( __u32 , longest )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > agno = be32_to_cpu ( agf - > agf_seqno ) ,
__entry - > flags = flags ;
__entry - > length = be32_to_cpu ( agf - > agf_length ) ,
2024-02-22 20:39:46 +00:00
__entry - > bno_root = be32_to_cpu ( agf - > agf_bno_root ) ,
__entry - > cnt_root = be32_to_cpu ( agf - > agf_cnt_root ) ,
__entry - > bno_level = be32_to_cpu ( agf - > agf_bno_level ) ,
__entry - > cnt_level = be32_to_cpu ( agf - > agf_cnt_level ) ,
2009-12-14 23:14:59 +00:00
__entry - > flfirst = be32_to_cpu ( agf - > agf_flfirst ) ,
__entry - > fllast = be32_to_cpu ( agf - > agf_fllast ) ,
__entry - > flcount = be32_to_cpu ( agf - > agf_flcount ) ,
__entry - > freeblks = be32_to_cpu ( agf - > agf_freeblks ) ,
__entry - > longest = be32_to_cpu ( agf - > agf_longest ) ;
__entry - > caller_ip = caller_ip ;
) ,
2021-08-17 16:24:26 +00:00
TP_printk ( " dev %d:%d agno 0x%x flags %s length %u roots b %u c %u "
2009-12-14 23:14:59 +00:00
" levels b %u c %u flfirst %u fllast %u flcount %u "
2018-01-09 19:46:05 +00:00
" freeblks %u longest %u caller %pS " ,
2009-12-14 23:14:59 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__print_flags ( __entry - > flags , " | " , XFS_AGF_FLAGS ) ,
__entry - > length ,
__entry - > bno_root ,
__entry - > cnt_root ,
__entry - > bno_level ,
__entry - > cnt_level ,
__entry - > flfirst ,
__entry - > fllast ,
__entry - > flcount ,
__entry - > freeblks ,
__entry - > longest ,
( void * ) __entry - > caller_ip )
) ;
2018-03-15 17:51:58 +00:00
# define DEFINE_AGF_EVENT(name) \
DEFINE_EVENT ( xfs_agf_class , name , \
TP_PROTO ( struct xfs_mount * mp , struct xfs_agf * agf , int flags , \
unsigned long caller_ip ) , \
TP_ARGS ( mp , agf , flags , caller_ip ) )
DEFINE_AGF_EVENT ( xfs_agf ) ;
DEFINE_AGF_EVENT ( xfs_agfl_reset ) ;
2009-12-14 23:14:59 +00:00
TRACE_EVENT ( xfs_free_extent ,
2024-11-04 04:18:32 +00:00
TP_PROTO ( const struct xfs_perag * pag , xfs_agblock_t agbno ,
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 00:30:52 +00:00
xfs_extlen_t len , enum xfs_ag_resv_type resv , int haveleft ,
int haveright ) ,
2024-11-04 04:18:27 +00:00
TP_ARGS ( pag , agbno , len , resv , haveleft , haveright ) ,
2009-12-14 23:14:59 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 00:30:52 +00:00
__field ( int , resv )
2009-12-14 23:14:59 +00:00
__field ( int , haveleft )
__field ( int , haveright )
) ,
TP_fast_assign (
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( pag ) - > m_super - > s_dev ;
__entry - > agno = pag_agno ( pag ) ;
2009-12-14 23:14:59 +00:00
__entry - > agbno = agbno ;
__entry - > len = len ;
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 00:30:52 +00:00
__entry - > resv = resv ;
2009-12-14 23:14:59 +00:00
__entry - > haveleft = haveleft ;
__entry - > haveright = haveright ;
) ,
2021-08-17 19:45:59 +00:00
TP_printk ( " dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x resv %d %s " ,
2009-12-14 23:14:59 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > len ,
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 00:30:52 +00:00
__entry - > resv ,
2009-12-14 23:14:59 +00:00
__entry - > haveleft ?
( __entry - > haveright ? " both " : " left " ) :
( __entry - > haveright ? " right " : " none " ) )
) ;
2009-12-21 14:03:03 +00:00
DECLARE_EVENT_CLASS ( xfs_alloc_class ,
TP_PROTO ( struct xfs_alloc_arg * args ) ,
TP_ARGS ( args ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , minlen )
__field ( xfs_extlen_t , maxlen )
__field ( xfs_extlen_t , mod )
__field ( xfs_extlen_t , prod )
__field ( xfs_extlen_t , minleft )
__field ( xfs_extlen_t , total )
__field ( xfs_extlen_t , alignment )
__field ( xfs_extlen_t , minalignslop )
__field ( xfs_extlen_t , len )
__field ( char , wasdel )
__field ( char , wasfromfl )
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 00:30:52 +00:00
__field ( int , resv )
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-25 22:21:28 +00:00
__field ( int , datatype )
2023-02-10 17:11:06 +00:00
__field ( xfs_agnumber_t , highest_agno )
2009-12-21 14:03:03 +00:00
) ,
TP_fast_assign (
__entry - > dev = args - > mp - > m_super - > s_dev ;
__entry - > agno = args - > agno ;
__entry - > agbno = args - > agbno ;
__entry - > minlen = args - > minlen ;
__entry - > maxlen = args - > maxlen ;
__entry - > mod = args - > mod ;
__entry - > prod = args - > prod ;
__entry - > minleft = args - > minleft ;
__entry - > total = args - > total ;
__entry - > alignment = args - > alignment ;
__entry - > minalignslop = args - > minalignslop ;
__entry - > len = args - > len ;
__entry - > wasdel = args - > wasdel ;
__entry - > wasfromfl = args - > wasfromfl ;
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 00:30:52 +00:00
__entry - > resv = args - > resv ;
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-25 22:21:28 +00:00
__entry - > datatype = args - > datatype ;
2023-02-10 17:11:06 +00:00
__entry - > highest_agno = args - > tp - > t_highest_agno ;
2009-12-21 14:03:03 +00:00
) ,
2021-08-17 16:28:53 +00:00
TP_printk ( " dev %d:%d agno 0x%x agbno 0x%x minlen %u maxlen %u mod %u "
2009-12-21 14:03:03 +00:00
" prod %u minleft %u total %u alignment %u minalignslop %u "
2023-02-12 22:14:54 +00:00
" len %u wasdel %d wasfromfl %d resv %d "
2023-02-10 17:11:06 +00:00
" datatype 0x%x highest_agno 0x%x " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > minlen ,
__entry - > maxlen ,
__entry - > mod ,
__entry - > prod ,
__entry - > minleft ,
__entry - > total ,
__entry - > alignment ,
__entry - > minalignslop ,
__entry - > len ,
__entry - > wasdel ,
__entry - > wasfromfl ,
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 00:30:52 +00:00
__entry - > resv ,
xfs: remote attribute blocks aren't really userdata
When adding a new remote attribute, we write the attribute to the
new extent before the allocation transaction is committed. This
means we cannot reuse busy extents as that violates crash
consistency semantics. Hence we currently treat remote attribute
extent allocation like userdata because it has the same overwrite
ordering constraints as userdata.
Unfortunately, this also allows the allocator to incorrectly apply
extent size hints to the remote attribute extent allocation. This
results in interesting failures, such as transaction block
reservation overruns and in-memory inode attribute fork corruption.
To fix this, we need to separate the busy extent reuse configuration
from the userdata configuration. This changes the definition of
XFS_BMAPI_METADATA slightly - it now means that allocation is
metadata and reuse of busy extents is acceptible due to the metadata
ordering semantics of the journal. If this flag is not set, it
means the allocation is that has unordered data writeback, and hence
busy extent reuse is not allowed. It no longer implies the
allocation is for user data, just that the data write will not be
strictly ordered. This matches the semantics for both user data
and remote attribute block allocation.
As such, This patch changes the "userdata" field to a "datatype"
field, and adds a "no busy reuse" flag to the field.
When we detect an unordered data extent allocation, we immediately set
the no reuse flag. We then set the "user data" flags based on the
inode fork we are allocating the extent to. Hence we only set
userdata flags on data fork allocations now and consider attribute
fork remote extents to be an unordered metadata extent.
The result is that remote attribute extents now have the expected
allocation semantics, and the data fork allocation behaviour is
completely unchanged.
It should be noted that there may be other ways to fix this (e.g.
use ordered metadata buffers for the remote attribute extent data
write) but they are more invasive and difficult to validate both
from a design and implementation POV. Hence this patch takes the
simple, obvious route to fixing the problem...
Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-25 22:21:28 +00:00
__entry - > datatype ,
2023-02-10 17:11:06 +00:00
__entry - > highest_agno )
2009-12-14 23:14:59 +00:00
)
2009-12-21 14:03:03 +00:00
# define DEFINE_ALLOC_EVENT(name) \
DEFINE_EVENT ( xfs_alloc_class , name , \
TP_PROTO ( struct xfs_alloc_arg * args ) , \
TP_ARGS ( args ) )
2009-12-14 23:14:59 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_exact_done ) ;
2010-12-10 15:03:57 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_exact_notfound ) ;
2009-12-14 23:14:59 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_exact_error ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_near_nominleft ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_near_first ) ;
2019-10-14 00:10:35 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_cur ) ;
2019-10-14 00:10:33 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_cur_right ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_cur_left ) ;
2019-10-14 00:10:36 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_cur_lookup ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_cur_lookup_done ) ;
2009-12-14 23:14:59 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_near_error ) ;
2011-04-24 19:06:15 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_near_noentry ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_near_busy ) ;
2009-12-14 23:14:59 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_size_neither ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_size_noentry ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_size_nominleft ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_size_done ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_size_error ) ;
2011-04-24 19:06:15 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_size_busy ) ;
2009-12-14 23:14:59 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_small_freelist ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_small_notenough ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_small_done ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_small_error ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_badargs ) ;
2023-02-10 17:07:06 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_skip_deadlock ) ;
2009-12-14 23:14:59 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_nofix ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_noagbp ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_loopfailed ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_allfailed ) ;
2023-03-16 00:30:33 +00:00
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_this_ag ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_start_ag ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_first_ag ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_exact_bno ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_near_bno ) ;
DEFINE_ALLOC_EVENT ( xfs_alloc_vextent_finish ) ;
2019-10-14 00:10:33 +00:00
TRACE_EVENT ( xfs_alloc_cur_check ,
2024-02-22 20:39:47 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , xfs_agblock_t bno ,
2019-10-14 00:10:33 +00:00
xfs_extlen_t len , xfs_extlen_t diff , bool new ) ,
2024-02-22 20:39:47 +00:00
TP_ARGS ( cur , bno , len , diff , new ) ,
2019-10-14 00:10:33 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-02-22 20:39:47 +00:00
__string ( name , cur - > bc_ops - > name )
2019-10-14 00:10:33 +00:00
__field ( xfs_agblock_t , bno )
__field ( xfs_extlen_t , len )
__field ( xfs_extlen_t , diff )
__field ( bool , new )
) ,
TP_fast_assign (
2024-02-22 20:39:47 +00:00
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
2019-10-14 00:10:33 +00:00
__entry - > bno = bno ;
__entry - > len = len ;
__entry - > diff = diff ;
__entry - > new = new ;
) ,
2024-02-22 20:39:47 +00:00
TP_printk ( " dev %d:%d %sbt agbno 0x%x fsbcount 0x%x diff 0x%x new %d " ,
2019-10-14 00:10:33 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-02-22 20:39:47 +00:00
__get_str ( name ) ,
2019-10-14 00:10:33 +00:00
__entry - > bno , __entry - > len , __entry - > diff , __entry - > new )
)
2012-03-22 05:15:13 +00:00
DECLARE_EVENT_CLASS ( xfs_da_class ,
2009-12-21 14:03:03 +00:00
TP_PROTO ( struct xfs_da_args * args ) ,
TP_ARGS ( args ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__dynamic_array ( char , name , args - > namelen )
__field ( int , namelen )
__field ( xfs_dahash_t , hashval )
__field ( xfs_ino_t , inumber )
2022-04-21 00:46:47 +00:00
__field ( uint32_t , op_flags )
2024-04-15 21:54:34 +00:00
__field ( xfs_ino_t , owner )
2009-12-21 14:03:03 +00:00
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( args - > dp ) - > i_sb - > s_dev ;
__entry - > ino = args - > dp - > i_ino ;
if ( args - > namelen )
memcpy ( __get_str ( name ) , args - > name , args - > namelen ) ;
__entry - > namelen = args - > namelen ;
__entry - > hashval = args - > hashval ;
__entry - > inumber = args - > inumber ;
__entry - > op_flags = args - > op_flags ;
2024-04-15 21:54:34 +00:00
__entry - > owner = args - > owner ;
2009-12-21 14:03:03 +00:00
) ,
TP_printk ( " dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
2024-04-15 21:54:34 +00:00
" inumber 0x%llx op_flags %s owner 0x%llx " ,
2009-12-21 14:03:03 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > namelen ,
__entry - > namelen ? __get_str ( name ) : NULL ,
__entry - > namelen ,
__entry - > hashval ,
__entry - > inumber ,
2024-04-15 21:54:34 +00:00
__print_flags ( __entry - > op_flags , " | " , XFS_DA_OP_FLAGS ) ,
__entry - > owner )
2009-12-21 14:03:03 +00:00
)
# define DEFINE_DIR2_EVENT(name) \
2012-03-22 05:15:13 +00:00
DEFINE_EVENT ( xfs_da_class , name , \
2009-12-14 23:14:59 +00:00
TP_PROTO ( struct xfs_da_args * args ) , \
2009-12-21 14:03:03 +00:00
TP_ARGS ( args ) )
DEFINE_DIR2_EVENT ( xfs_dir2_sf_addname ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_create ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_lookup ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_replace ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_removename ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_toino4 ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_toino8 ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_sf_to_block ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_addname ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_lookup ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_replace ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_removename ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_to_sf ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_block_to_leaf ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_addname ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_lookup ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_replace ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_removename ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_to_block ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_leaf_to_node ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_node_addname ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_node_lookup ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_node_replace ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_node_removename ) ;
DEFINE_DIR2_EVENT ( xfs_dir2_node_to_leaf ) ;
2012-11-12 11:53:53 +00:00
DECLARE_EVENT_CLASS ( xfs_attr_class ,
TP_PROTO ( struct xfs_da_args * args ) ,
TP_ARGS ( args ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__dynamic_array ( char , name , args - > namelen )
__field ( int , namelen )
__field ( int , valuelen )
__field ( xfs_dahash_t , hashval )
2020-02-27 01:30:42 +00:00
__field ( unsigned int , attr_filter )
2022-04-21 00:46:47 +00:00
__field ( uint32_t , op_flags )
2012-11-12 11:53:53 +00:00
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( args - > dp ) - > i_sb - > s_dev ;
__entry - > ino = args - > dp - > i_ino ;
if ( args - > namelen )
memcpy ( __get_str ( name ) , args - > name , args - > namelen ) ;
__entry - > namelen = args - > namelen ;
__entry - > valuelen = args - > valuelen ;
__entry - > hashval = args - > hashval ;
2020-02-27 01:30:42 +00:00
__entry - > attr_filter = args - > attr_filter ;
2012-11-12 11:53:53 +00:00
__entry - > op_flags = args - > op_flags ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx name %.*s namelen %d valuelen %d "
2024-04-22 16:47:21 +00:00
" hashval 0x%x filter %s op_flags %s " ,
2012-11-12 11:53:53 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > namelen ,
__entry - > namelen ? __get_str ( name ) : NULL ,
__entry - > namelen ,
__entry - > valuelen ,
__entry - > hashval ,
2020-02-27 01:30:42 +00:00
__print_flags ( __entry - > attr_filter , " | " ,
XFS_ATTR_FILTER_FLAGS ) ,
2012-11-12 11:53:53 +00:00
__print_flags ( __entry - > op_flags , " | " , XFS_DA_OP_FLAGS ) )
)
2012-03-22 05:15:13 +00:00
# define DEFINE_ATTR_EVENT(name) \
2012-11-12 11:53:53 +00:00
DEFINE_EVENT ( xfs_attr_class , name , \
2012-03-22 05:15:13 +00:00
TP_PROTO ( struct xfs_da_args * args ) , \
TP_ARGS ( args ) )
DEFINE_ATTR_EVENT ( xfs_attr_sf_add ) ;
DEFINE_ATTR_EVENT ( xfs_attr_sf_addname ) ;
DEFINE_ATTR_EVENT ( xfs_attr_sf_create ) ;
DEFINE_ATTR_EVENT ( xfs_attr_sf_lookup ) ;
DEFINE_ATTR_EVENT ( xfs_attr_sf_remove ) ;
DEFINE_ATTR_EVENT ( xfs_attr_sf_to_leaf ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_add ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_add_old ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_add_new ) ;
2012-11-12 11:53:53 +00:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_add_work ) ;
2012-03-22 05:15:13 +00:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_create ) ;
2012-11-12 11:53:53 +00:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_compact ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_get ) ;
2012-03-22 05:15:13 +00:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_lookup ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_replace ) ;
2012-11-12 11:53:53 +00:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_remove ) ;
2012-03-22 05:15:13 +00:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_removename ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_split ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_split_before ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_split_after ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_clearflag ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_setflag ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_flipflags ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_to_sf ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_to_node ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_rebalance ) ;
DEFINE_ATTR_EVENT ( xfs_attr_leaf_unbalance ) ;
2012-11-12 11:53:53 +00:00
DEFINE_ATTR_EVENT ( xfs_attr_leaf_toosmall ) ;
2012-03-22 05:15:13 +00:00
DEFINE_ATTR_EVENT ( xfs_attr_node_addname ) ;
2012-11-12 11:53:53 +00:00
DEFINE_ATTR_EVENT ( xfs_attr_node_get ) ;
2012-03-22 05:15:13 +00:00
DEFINE_ATTR_EVENT ( xfs_attr_node_replace ) ;
DEFINE_ATTR_EVENT ( xfs_attr_node_removename ) ;
2012-11-12 11:53:53 +00:00
DEFINE_ATTR_EVENT ( xfs_attr_fillstate ) ;
DEFINE_ATTR_EVENT ( xfs_attr_refillstate ) ;
DEFINE_ATTR_EVENT ( xfs_attr_rmtval_get ) ;
DEFINE_ATTR_EVENT ( xfs_attr_rmtval_set ) ;
2012-03-22 05:15:13 +00:00
# define DEFINE_DA_EVENT(name) \
DEFINE_EVENT ( xfs_da_class , name , \
TP_PROTO ( struct xfs_da_args * args ) , \
TP_ARGS ( args ) )
DEFINE_DA_EVENT ( xfs_da_split ) ;
DEFINE_DA_EVENT ( xfs_da_join ) ;
DEFINE_DA_EVENT ( xfs_da_link_before ) ;
DEFINE_DA_EVENT ( xfs_da_link_after ) ;
DEFINE_DA_EVENT ( xfs_da_unlink_back ) ;
DEFINE_DA_EVENT ( xfs_da_unlink_forward ) ;
DEFINE_DA_EVENT ( xfs_da_root_split ) ;
DEFINE_DA_EVENT ( xfs_da_root_join ) ;
DEFINE_DA_EVENT ( xfs_da_node_add ) ;
DEFINE_DA_EVENT ( xfs_da_node_create ) ;
DEFINE_DA_EVENT ( xfs_da_node_split ) ;
DEFINE_DA_EVENT ( xfs_da_node_remove ) ;
DEFINE_DA_EVENT ( xfs_da_node_rebalance ) ;
DEFINE_DA_EVENT ( xfs_da_node_unbalance ) ;
2012-11-12 11:53:53 +00:00
DEFINE_DA_EVENT ( xfs_da_node_toosmall ) ;
2012-03-22 05:15:13 +00:00
DEFINE_DA_EVENT ( xfs_da_swap_lastblock ) ;
DEFINE_DA_EVENT ( xfs_da_grow_inode ) ;
DEFINE_DA_EVENT ( xfs_da_shrink_inode ) ;
2012-11-12 11:53:53 +00:00
DEFINE_DA_EVENT ( xfs_da_fixhashpath ) ;
DEFINE_DA_EVENT ( xfs_da_path_shift ) ;
2012-03-22 05:15:13 +00:00
2009-12-21 14:03:03 +00:00
DECLARE_EVENT_CLASS ( xfs_dir2_space_class ,
TP_PROTO ( struct xfs_da_args * args , int idx ) ,
TP_ARGS ( args , idx ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
2022-04-21 00:46:47 +00:00
__field ( uint32_t , op_flags )
2009-12-21 14:03:03 +00:00
__field ( int , idx )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( args - > dp ) - > i_sb - > s_dev ;
__entry - > ino = args - > dp - > i_ino ;
__entry - > op_flags = args - > op_flags ;
__entry - > idx = idx ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx op_flags %s index %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_flags ( __entry - > op_flags , " | " , XFS_DA_OP_FLAGS ) ,
__entry - > idx )
2009-12-14 23:14:59 +00:00
)
2009-12-21 14:03:03 +00:00
# define DEFINE_DIR2_SPACE_EVENT(name) \
DEFINE_EVENT ( xfs_dir2_space_class , name , \
2009-12-14 23:14:59 +00:00
TP_PROTO ( struct xfs_da_args * args , int idx ) , \
2009-12-21 14:03:03 +00:00
TP_ARGS ( args , idx ) )
DEFINE_DIR2_SPACE_EVENT ( xfs_dir2_leafn_add ) ;
DEFINE_DIR2_SPACE_EVENT ( xfs_dir2_leafn_remove ) ;
DEFINE_DIR2_SPACE_EVENT ( xfs_dir2_grow_inode ) ;
DEFINE_DIR2_SPACE_EVENT ( xfs_dir2_shrink_inode ) ;
2009-12-14 23:14:59 +00:00
TRACE_EVENT ( xfs_dir2_leafn_moveents ,
TP_PROTO ( struct xfs_da_args * args , int src_idx , int dst_idx , int count ) ,
TP_ARGS ( args , src_idx , dst_idx , count ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
2022-04-21 00:46:47 +00:00
__field ( uint32_t , op_flags )
2009-12-14 23:14:59 +00:00
__field ( int , src_idx )
__field ( int , dst_idx )
__field ( int , count )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( args - > dp ) - > i_sb - > s_dev ;
__entry - > ino = args - > dp - > i_ino ;
__entry - > op_flags = args - > op_flags ;
__entry - > src_idx = src_idx ;
__entry - > dst_idx = dst_idx ;
__entry - > count = count ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx op_flags %s "
" src_idx %d dst_idx %d count %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_flags ( __entry - > op_flags , " | " , XFS_DA_OP_FLAGS ) ,
__entry - > src_idx ,
__entry - > dst_idx ,
__entry - > count )
) ;
2010-01-14 01:33:55 +00:00
# define XFS_SWAPEXT_INODES \
{ 0 , " target " } , \
{ 1 , " temp " }
2018-12-18 22:32:30 +00:00
TRACE_DEFINE_ENUM ( XFS_DINODE_FMT_DEV ) ;
TRACE_DEFINE_ENUM ( XFS_DINODE_FMT_LOCAL ) ;
TRACE_DEFINE_ENUM ( XFS_DINODE_FMT_EXTENTS ) ;
TRACE_DEFINE_ENUM ( XFS_DINODE_FMT_BTREE ) ;
TRACE_DEFINE_ENUM ( XFS_DINODE_FMT_UUID ) ;
2010-01-14 01:33:55 +00:00
DECLARE_EVENT_CLASS ( xfs_swap_extent_class ,
TP_PROTO ( struct xfs_inode * ip , int which ) ,
TP_ARGS ( ip , which ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( int , which )
__field ( xfs_ino_t , ino )
__field ( int , format )
2021-02-26 05:54:31 +00:00
__field ( xfs_extnum_t , nex )
2010-01-14 01:33:55 +00:00
__field ( int , broot_size )
__field ( int , fork_off )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > which = which ;
__entry - > ino = ip - > i_ino ;
2020-05-18 17:28:05 +00:00
__entry - > format = ip - > i_df . if_format ;
2020-05-18 17:27:22 +00:00
__entry - > nex = ip - > i_df . if_nextents ;
2010-01-14 01:33:55 +00:00
__entry - > broot_size = ip - > i_df . if_broot_bytes ;
2022-07-09 17:56:07 +00:00
__entry - > fork_off = xfs_inode_fork_boff ( ip ) ;
2010-01-14 01:33:55 +00:00
) ,
2021-11-16 07:28:40 +00:00
TP_printk ( " dev %d:%d ino 0x%llx (%s), %s format, num_extents %llu, "
2021-08-17 17:09:12 +00:00
" broot size %d, forkoff 0x%x " ,
2010-01-14 01:33:55 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_symbolic ( __entry - > which , XFS_SWAPEXT_INODES ) ,
__print_symbolic ( __entry - > format , XFS_INODE_FORMAT_STR ) ,
__entry - > nex ,
__entry - > broot_size ,
__entry - > fork_off )
)
# define DEFINE_SWAPEXT_EVENT(name) \
DEFINE_EVENT ( xfs_swap_extent_class , name , \
TP_PROTO ( struct xfs_inode * ip , int which ) , \
TP_ARGS ( ip , which ) )
DEFINE_SWAPEXT_EVENT ( xfs_swap_extent_before ) ;
DEFINE_SWAPEXT_EVENT ( xfs_swap_extent_after ) ;
2017-08-09 01:21:53 +00:00
TRACE_EVENT ( xfs_log_recover ,
TP_PROTO ( struct xlog * log , xfs_daddr_t headblk , xfs_daddr_t tailblk ) ,
TP_ARGS ( log , headblk , tailblk ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_daddr_t , headblk )
__field ( xfs_daddr_t , tailblk )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > headblk = headblk ;
__entry - > tailblk = tailblk ;
) ,
TP_printk ( " dev %d:%d headblk 0x%llx tailblk 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) , __entry - > headblk ,
__entry - > tailblk )
)
2016-09-25 22:34:52 +00:00
TRACE_EVENT ( xfs_log_recover_record ,
TP_PROTO ( struct xlog * log , struct xlog_rec_header * rhead , int pass ) ,
TP_ARGS ( log , rhead , pass ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_lsn_t , lsn )
__field ( int , len )
__field ( int , num_logops )
__field ( int , pass )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > lsn = be64_to_cpu ( rhead - > h_lsn ) ;
__entry - > len = be32_to_cpu ( rhead - > h_len ) ;
__entry - > num_logops = be32_to_cpu ( rhead - > h_num_logops ) ;
__entry - > pass = pass ;
) ,
TP_printk ( " dev %d:%d lsn 0x%llx len 0x%x num_logops 0x%x pass %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > lsn , __entry - > len , __entry - > num_logops ,
__entry - > pass )
)
2010-04-13 05:06:46 +00:00
DECLARE_EVENT_CLASS ( xfs_log_recover_item_class ,
2012-06-14 14:22:15 +00:00
TP_PROTO ( struct xlog * log , struct xlog_recover * trans ,
2010-04-13 05:06:46 +00:00
struct xlog_recover_item * item , int pass ) ,
TP_ARGS ( log , trans , item , pass ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned long , item )
__field ( xlog_tid_t , tid )
2016-09-25 22:34:52 +00:00
__field ( xfs_lsn_t , lsn )
2010-04-13 05:06:46 +00:00
__field ( int , type )
__field ( int , pass )
__field ( int , count )
__field ( int , total )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > item = ( unsigned long ) item ;
__entry - > tid = trans - > r_log_tid ;
2016-09-25 22:34:52 +00:00
__entry - > lsn = trans - > r_lsn ;
2010-04-13 05:06:46 +00:00
__entry - > type = ITEM_TYPE ( item ) ;
__entry - > pass = pass ;
__entry - > count = item - > ri_cnt ;
__entry - > total = item - > ri_total ;
) ,
2018-01-09 19:43:36 +00:00
TP_printk ( " dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item %p, "
2016-09-25 22:34:52 +00:00
" item type %s item region count/total %d/%d " ,
2010-04-13 05:06:46 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > tid ,
2016-09-25 22:34:52 +00:00
__entry - > lsn ,
2010-04-13 05:06:46 +00:00
__entry - > pass ,
( void * ) __entry - > item ,
__print_symbolic ( __entry - > type , XFS_LI_TYPE_DESC ) ,
__entry - > count ,
__entry - > total )
)
# define DEFINE_LOG_RECOVER_ITEM(name) \
DEFINE_EVENT ( xfs_log_recover_item_class , name , \
2012-06-14 14:22:15 +00:00
TP_PROTO ( struct xlog * log , struct xlog_recover * trans , \
2010-04-13 05:06:46 +00:00
struct xlog_recover_item * item , int pass ) , \
TP_ARGS ( log , trans , item , pass ) )
DEFINE_LOG_RECOVER_ITEM ( xfs_log_recover_item_add ) ;
DEFINE_LOG_RECOVER_ITEM ( xfs_log_recover_item_add_cont ) ;
DEFINE_LOG_RECOVER_ITEM ( xfs_log_recover_item_reorder_head ) ;
DEFINE_LOG_RECOVER_ITEM ( xfs_log_recover_item_reorder_tail ) ;
DEFINE_LOG_RECOVER_ITEM ( xfs_log_recover_item_recover ) ;
DECLARE_EVENT_CLASS ( xfs_log_recover_buf_item_class ,
2012-06-14 14:22:15 +00:00
TP_PROTO ( struct xlog * log , struct xfs_buf_log_format * buf_f ) ,
2010-04-13 05:06:46 +00:00
TP_ARGS ( log , buf_f ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2017-06-16 18:00:05 +00:00
__field ( int64_t , blkno )
2010-04-13 05:06:46 +00:00
__field ( unsigned short , len )
__field ( unsigned short , flags )
__field ( unsigned short , size )
__field ( unsigned int , map_size )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > blkno = buf_f - > blf_blkno ;
__entry - > len = buf_f - > blf_len ;
__entry - > flags = buf_f - > blf_flags ;
__entry - > size = buf_f - > blf_size ;
__entry - > map_size = buf_f - > blf_map_size ;
) ,
2021-08-17 19:45:59 +00:00
TP_printk ( " dev %d:%d daddr 0x%llx, bbcount 0x%x, flags 0x%x, size %d, "
2010-04-13 05:06:46 +00:00
" map_size %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > blkno ,
__entry - > len ,
__entry - > flags ,
__entry - > size ,
__entry - > map_size )
)
# define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
DEFINE_EVENT ( xfs_log_recover_buf_item_class , name , \
2012-06-14 14:22:15 +00:00
TP_PROTO ( struct xlog * log , struct xfs_buf_log_format * buf_f ) , \
2010-04-13 05:06:46 +00:00
TP_ARGS ( log , buf_f ) )
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_not_cancel ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_cancel ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_cancel_add ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_cancel_ref_inc ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_recover ) ;
2016-09-25 22:34:52 +00:00
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_skip ) ;
2010-04-13 05:06:46 +00:00
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_inode_buf ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_reg_buf ) ;
DEFINE_LOG_RECOVER_BUF_ITEM ( xfs_log_recover_buf_dquot_buf ) ;
DECLARE_EVENT_CLASS ( xfs_log_recover_ino_item_class ,
2012-06-14 14:22:15 +00:00
TP_PROTO ( struct xlog * log , struct xfs_inode_log_format * in_f ) ,
2010-04-13 05:06:46 +00:00
TP_ARGS ( log , in_f ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( unsigned short , size )
__field ( int , fields )
__field ( unsigned short , asize )
__field ( unsigned short , dsize )
2017-06-16 18:00:05 +00:00
__field ( int64_t , blkno )
2010-04-13 05:06:46 +00:00
__field ( int , len )
__field ( int , boffset )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > ino = in_f - > ilf_ino ;
__entry - > size = in_f - > ilf_size ;
__entry - > fields = in_f - > ilf_fields ;
__entry - > asize = in_f - > ilf_asize ;
__entry - > dsize = in_f - > ilf_dsize ;
__entry - > blkno = in_f - > ilf_blkno ;
__entry - > len = in_f - > ilf_len ;
__entry - > boffset = in_f - > ilf_boffset ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
2021-08-17 19:45:59 +00:00
" dsize %d, daddr 0x%llx, bbcount 0x%x, boffset %d " ,
2010-04-13 05:06:46 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > size ,
__entry - > fields ,
__entry - > asize ,
__entry - > dsize ,
__entry - > blkno ,
__entry - > len ,
__entry - > boffset )
)
# define DEFINE_LOG_RECOVER_INO_ITEM(name) \
DEFINE_EVENT ( xfs_log_recover_ino_item_class , name , \
2012-06-14 14:22:15 +00:00
TP_PROTO ( struct xlog * log , struct xfs_inode_log_format * in_f ) , \
2010-04-13 05:06:46 +00:00
TP_ARGS ( log , in_f ) )
DEFINE_LOG_RECOVER_INO_ITEM ( xfs_log_recover_inode_recover ) ;
DEFINE_LOG_RECOVER_INO_ITEM ( xfs_log_recover_inode_cancel ) ;
DEFINE_LOG_RECOVER_INO_ITEM ( xfs_log_recover_inode_skip ) ;
2015-08-18 23:58:48 +00:00
DECLARE_EVENT_CLASS ( xfs_log_recover_icreate_item_class ,
TP_PROTO ( struct xlog * log , struct xfs_icreate_log * in_f ) ,
TP_ARGS ( log , in_f ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( unsigned int , count )
__field ( unsigned int , isize )
__field ( xfs_agblock_t , length )
__field ( unsigned int , gen )
) ,
TP_fast_assign (
__entry - > dev = log - > l_mp - > m_super - > s_dev ;
__entry - > agno = be32_to_cpu ( in_f - > icl_ag ) ;
__entry - > agbno = be32_to_cpu ( in_f - > icl_agbno ) ;
__entry - > count = be32_to_cpu ( in_f - > icl_count ) ;
__entry - > isize = be32_to_cpu ( in_f - > icl_isize ) ;
__entry - > length = be32_to_cpu ( in_f - > icl_length ) ;
__entry - > gen = be32_to_cpu ( in_f - > icl_gen ) ;
) ,
2021-08-17 22:45:25 +00:00
TP_printk ( " dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x ireccount %u isize %u gen 0x%x " ,
2021-08-17 19:45:59 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > length ,
__entry - > count ,
__entry - > isize ,
__entry - > gen )
2015-08-18 23:58:48 +00:00
)
# define DEFINE_LOG_RECOVER_ICREATE_ITEM(name) \
DEFINE_EVENT ( xfs_log_recover_icreate_item_class , name , \
TP_PROTO ( struct xlog * log , struct xfs_icreate_log * in_f ) , \
TP_ARGS ( log , in_f ) )
DEFINE_LOG_RECOVER_ICREATE_ITEM ( xfs_log_recover_icreate_cancel ) ;
DEFINE_LOG_RECOVER_ICREATE_ITEM ( xfs_log_recover_icreate_recover ) ;
2011-01-07 13:02:04 +00:00
DECLARE_EVENT_CLASS ( xfs_discard_class ,
2024-11-04 04:19:35 +00:00
TP_PROTO ( const struct xfs_group * xg , xfs_agblock_t agbno ,
2024-11-04 04:18:36 +00:00
xfs_extlen_t len ) ,
2024-11-04 04:19:35 +00:00
TP_ARGS ( xg , agbno , len ) ,
2011-01-07 13:02:04 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-11-04 04:19:35 +00:00
__field ( enum xfs_group_type , type )
2011-01-07 13:02:04 +00:00
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
) ,
TP_fast_assign (
2024-11-04 04:19:35 +00:00
__entry - > dev = xg - > xg_mount - > m_super - > s_dev ;
__entry - > type = xg - > xg_type ;
__entry - > agno = xg - > xg_gno ;
2011-01-07 13:02:04 +00:00
__entry - > agbno = agbno ;
__entry - > len = len ;
) ,
2024-11-04 04:19:35 +00:00
TP_printk ( " dev %d:%d %sno 0x%x gbno 0x%x fsbcount 0x%x " ,
2011-01-07 13:02:04 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-11-04 04:19:35 +00:00
__print_symbolic ( __entry - > type , XG_TYPE_STRINGS ) ,
2011-01-07 13:02:04 +00:00
__entry - > agno ,
__entry - > agbno ,
__entry - > len )
)
# define DEFINE_DISCARD_EVENT(name) \
DEFINE_EVENT ( xfs_discard_class , name , \
2024-11-04 04:19:35 +00:00
TP_PROTO ( const struct xfs_group * xg , xfs_agblock_t agbno , \
xfs_extlen_t len ) , \
TP_ARGS ( xg , agbno , len ) )
2011-01-07 13:02:04 +00:00
DEFINE_DISCARD_EVENT ( xfs_discard_extent ) ;
DEFINE_DISCARD_EVENT ( xfs_discard_toosmall ) ;
DEFINE_DISCARD_EVENT ( xfs_discard_exclude ) ;
DEFINE_DISCARD_EVENT ( xfs_discard_busy ) ;
2024-06-24 15:04:21 +00:00
DECLARE_EVENT_CLASS ( xfs_rtdiscard_class ,
TP_PROTO ( struct xfs_mount * mp ,
xfs_rtblock_t rtbno , xfs_rtblock_t len ) ,
TP_ARGS ( mp , rtbno , len ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_rtblock_t , rtbno )
__field ( xfs_rtblock_t , len )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_rtdev_targp - > bt_dev ;
__entry - > rtbno = rtbno ;
__entry - > len = len ;
) ,
TP_printk ( " dev %d:%d rtbno 0x%llx rtbcount 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > rtbno ,
__entry - > len )
)
# define DEFINE_RTDISCARD_EVENT(name) \
DEFINE_EVENT ( xfs_rtdiscard_class , name , \
TP_PROTO ( struct xfs_mount * mp , \
xfs_rtblock_t rtbno , xfs_rtblock_t len ) , \
TP_ARGS ( mp , rtbno , len ) )
DEFINE_RTDISCARD_EVENT ( xfs_discard_rtextent ) ;
DEFINE_RTDISCARD_EVENT ( xfs_discard_rttoosmall ) ;
DEFINE_RTDISCARD_EVENT ( xfs_discard_rtrelax ) ;
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 01:08:36 +00:00
DECLARE_EVENT_CLASS ( xfs_btree_cur_class ,
TP_PROTO ( struct xfs_btree_cur * cur , int level , struct xfs_buf * bp ) ,
TP_ARGS ( cur , level , bp ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-02-22 20:39:47 +00:00
__string ( name , cur - > bc_ops - > name )
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 01:08:36 +00:00
__field ( int , level )
__field ( int , nlevels )
__field ( int , ptr )
__field ( xfs_daddr_t , daddr )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 01:08:36 +00:00
__entry - > level = level ;
__entry - > nlevels = cur - > bc_nlevels ;
2021-09-16 19:24:04 +00:00
__entry - > ptr = cur - > bc_levels [ level ] . ptr ;
2021-08-19 01:47:05 +00:00
__entry - > daddr = bp ? xfs_buf_daddr ( bp ) : - 1 ;
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 01:08:36 +00:00
) ,
2024-02-22 20:39:47 +00:00
TP_printk ( " dev %d:%d %sbt level %d/%d ptr %d daddr 0x%llx " ,
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 01:08:36 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-02-22 20:39:47 +00:00
__get_str ( name ) ,
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 01:08:36 +00:00
__entry - > level ,
__entry - > nlevels ,
__entry - > ptr ,
( unsigned long long ) __entry - > daddr )
)
# define DEFINE_BTREE_CUR_EVENT(name) \
DEFINE_EVENT ( xfs_btree_cur_class , name , \
TP_PROTO ( struct xfs_btree_cur * cur , int level , struct xfs_buf * bp ) , \
TP_ARGS ( cur , level , bp ) )
DEFINE_BTREE_CUR_EVENT ( xfs_btree_updkeys ) ;
2016-08-03 01:10:21 +00:00
DEFINE_BTREE_CUR_EVENT ( xfs_btree_overlapped_query_range ) ;
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's
possible to have multiple rmap records referring to the same blocks on
disk. When overlapping intervals are possible, querying a classic
btree to find all records intersecting a given interval is inefficient
because we cannot use the left side of the search interval to filter
out non-matching records the same way that we can use the existing
btree key to filter out records coming after the right side of the
search interval. This will become important once we want to use the
rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl.
(For the non-overlapping case, we can perform such queries trivially
by starting at the left side of the interval and walking the tree
until we pass the right side.)
Therefore, extend the btree code to come closer to supporting
intervals as a first-class record attribute. This involves widening
the btree node's key space to store both the lowest key reachable via
the node pointer (as the btree does now) and the highest key reachable
via the same pointer and teaching the btree modifying functions to
keep the highest-key records up to date.
This behavior can be turned on via a new btree ops flag so that btrees
that cannot store overlapping intervals don't pay the overhead costs
in terms of extra code and disk format changes.
When we're deleting a record in a btree that supports overlapped
interval records and the deletion results in two btree blocks being
joined, we defer updating the high/low keys until after all possible
joining (at higher levels in the tree) have finished. At this point,
the btree pointers at all levels have been updated to remove the empty
blocks and we can update the low and high keys.
When we're doing this, we must be careful to update the keys of all
node pointers up to the root instead of stopping at the first set of
keys that don't need updating. This is because it's possible for a
single deletion to cause joining of multiple levels of tree, and so
we need to update everything going back to the root.
The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than,
equal to, or greater than key2, respectively. This is consistent
with the rest of the kernel and the C library.
In btree_updkeys(), we need to evaluate the force_all parameter before
running the key diff to avoid reading uninitialized memory when we're
forcing a key update. This happens when we've allocated an empty slot
at level N + 1 to point to a new block at level N and we're in the
process of filling out the new keys.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 01:08:36 +00:00
2024-02-22 20:33:07 +00:00
TRACE_EVENT ( xfs_btree_alloc_block ,
TP_PROTO ( struct xfs_btree_cur * cur , union xfs_btree_ptr * ptr , int stat ,
int error ) ,
TP_ARGS ( cur , ptr , stat , error ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_ino_t , ino )
2024-02-22 20:39:47 +00:00
__string ( name , cur - > bc_ops - > name )
2024-02-22 20:33:07 +00:00
__field ( int , error )
__field ( xfs_agblock_t , agbno )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-02-22 20:43:35 +00:00
switch ( cur - > bc_ops - > type ) {
case XFS_BTREE_TYPE_INODE :
2024-02-22 20:33:07 +00:00
__entry - > agno = 0 ;
__entry - > ino = cur - > bc_ino . ip - > i_ino ;
2024-02-22 20:43:35 +00:00
break ;
case XFS_BTREE_TYPE_AG :
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2024-02-22 20:33:07 +00:00
__entry - > ino = 0 ;
2024-02-22 20:43:35 +00:00
break ;
case XFS_BTREE_TYPE_MEM :
__entry - > agno = 0 ;
__entry - > ino = 0 ;
break ;
2024-02-22 20:33:07 +00:00
}
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
2024-02-22 20:33:07 +00:00
__entry - > error = error ;
if ( ! error & & stat ) {
2024-02-22 20:35:36 +00:00
if ( cur - > bc_ops - > ptr_len = = XFS_BTREE_LONG_PTR_LEN ) {
2024-02-22 20:33:07 +00:00
xfs_fsblock_t fsb = be64_to_cpu ( ptr - > l ) ;
__entry - > agno = XFS_FSB_TO_AGNO ( cur - > bc_mp ,
fsb ) ;
__entry - > agbno = XFS_FSB_TO_AGBNO ( cur - > bc_mp ,
fsb ) ;
} else {
__entry - > agbno = be32_to_cpu ( ptr - > s ) ;
}
} else {
__entry - > agbno = NULLAGBLOCK ;
}
) ,
2024-02-22 20:39:47 +00:00
TP_printk ( " dev %d:%d %sbt agno 0x%x ino 0x%llx agbno 0x%x error %d " ,
2024-02-22 20:33:07 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-02-22 20:39:47 +00:00
__get_str ( name ) ,
2024-02-22 20:33:07 +00:00
__entry - > agno ,
__entry - > ino ,
__entry - > agbno ,
__entry - > error )
) ;
2024-02-22 20:33:06 +00:00
TRACE_EVENT ( xfs_btree_free_block ,
TP_PROTO ( struct xfs_btree_cur * cur , struct xfs_buf * bp ) ,
TP_ARGS ( cur , bp ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_ino_t , ino )
2024-02-22 20:39:47 +00:00
__string ( name , cur - > bc_ops - > name )
2024-02-22 20:33:06 +00:00
__field ( xfs_agblock_t , agbno )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
__entry - > agno = xfs_daddr_to_agno ( cur - > bc_mp ,
xfs_buf_daddr ( bp ) ) ;
2024-02-22 20:36:17 +00:00
if ( cur - > bc_ops - > type = = XFS_BTREE_TYPE_INODE )
2024-02-22 20:33:06 +00:00
__entry - > ino = cur - > bc_ino . ip - > i_ino ;
else
__entry - > ino = 0 ;
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
2024-02-22 20:33:06 +00:00
__entry - > agbno = xfs_daddr_to_agbno ( cur - > bc_mp ,
xfs_buf_daddr ( bp ) ) ;
) ,
2024-02-22 20:39:47 +00:00
TP_printk ( " dev %d:%d %sbt agno 0x%x ino 0x%llx agbno 0x%x " ,
2024-02-22 20:33:06 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-02-22 20:39:47 +00:00
__get_str ( name ) ,
2024-02-22 20:33:06 +00:00
__entry - > agno ,
__entry - > ino ,
__entry - > agbno )
) ;
2016-08-03 01:13:02 +00:00
/* deferred ops */
struct xfs_defer_pending ;
DECLARE_EVENT_CLASS ( xfs_defer_class ,
2018-08-01 14:20:35 +00:00
TP_PROTO ( struct xfs_trans * tp , unsigned long caller_ip ) ,
TP_ARGS ( tp , caller_ip ) ,
2016-08-03 01:13:02 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2018-08-01 14:20:35 +00:00
__field ( struct xfs_trans * , tp )
2017-04-21 18:24:42 +00:00
__field ( char , committed )
2018-05-09 14:48:52 +00:00
__field ( unsigned long , caller_ip )
2016-08-03 01:13:02 +00:00
) ,
TP_fast_assign (
2018-08-01 14:20:35 +00:00
__entry - > dev = tp - > t_mountp - > m_super - > s_dev ;
__entry - > tp = tp ;
2018-05-09 14:48:52 +00:00
__entry - > caller_ip = caller_ip ;
2016-08-03 01:13:02 +00:00
) ,
2018-08-01 14:20:35 +00:00
TP_printk ( " dev %d:%d tp %p caller %pS " ,
2016-08-03 01:13:02 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2018-08-01 14:20:35 +00:00
__entry - > tp ,
2018-05-09 14:48:52 +00:00
( char * ) __entry - > caller_ip )
2016-08-03 01:13:02 +00:00
)
# define DEFINE_DEFER_EVENT(name) \
DEFINE_EVENT ( xfs_defer_class , name , \
2018-08-01 14:20:35 +00:00
TP_PROTO ( struct xfs_trans * tp , unsigned long caller_ip ) , \
TP_ARGS ( tp , caller_ip ) )
2016-08-03 01:13:02 +00:00
DECLARE_EVENT_CLASS ( xfs_defer_error_class ,
2018-08-01 14:20:35 +00:00
TP_PROTO ( struct xfs_trans * tp , int error ) ,
TP_ARGS ( tp , error ) ,
2016-08-03 01:13:02 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2018-08-01 14:20:35 +00:00
__field ( struct xfs_trans * , tp )
2017-04-21 18:24:42 +00:00
__field ( char , committed )
2016-08-03 01:13:02 +00:00
__field ( int , error )
) ,
TP_fast_assign (
2018-08-01 14:20:35 +00:00
__entry - > dev = tp - > t_mountp - > m_super - > s_dev ;
__entry - > tp = tp ;
2016-08-03 01:13:02 +00:00
__entry - > error = error ;
) ,
2018-08-01 14:20:35 +00:00
TP_printk ( " dev %d:%d tp %p err %d " ,
2016-08-03 01:13:02 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2018-08-01 14:20:35 +00:00
__entry - > tp ,
2016-08-03 01:13:02 +00:00
__entry - > error )
)
# define DEFINE_DEFER_ERROR_EVENT(name) \
DEFINE_EVENT ( xfs_defer_error_class , name , \
2018-08-01 14:20:35 +00:00
TP_PROTO ( struct xfs_trans * tp , int error ) , \
TP_ARGS ( tp , error ) )
2016-08-03 01:13:02 +00:00
DECLARE_EVENT_CLASS ( xfs_defer_pending_class ,
TP_PROTO ( struct xfs_mount * mp , struct xfs_defer_pending * dfp ) ,
TP_ARGS ( mp , dfp ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2023-12-13 09:06:31 +00:00
__string ( name , dfp - > dfp_ops - > name )
2016-08-03 01:13:02 +00:00
__field ( void * , intent )
xfs: allow pausing of pending deferred work items
Traditionally, all pending deferred work attached to a transaction is
finished when one of the xfs_defer_finish* functions is called.
However, online repair wants to be able to allocate space for a new data
structure, format a new metadata structure into the allocated space, and
commit that into the filesystem.
As a hedge against system crashes during repairs, we also want to log
some EFI items for the allocated space speculatively, and cancel them if
we elect to commit the new data structure.
Therefore, introduce the idea of pausing a pending deferred work item.
Log intent items are still created for paused items and relogged as
necessary. However, paused items are pushed onto a side list before we
start calling ->finish_item, and the whole list is reattach to the
transaction afterwards. New work items are never attached to paused
pending items.
Modify xfs_defer_cancel to clean up pending deferred work items holding
a log intent item but not a log intent done item, since that is now
possible.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-12-07 02:40:56 +00:00
__field ( unsigned int , flags )
2017-04-21 18:24:42 +00:00
__field ( char , committed )
2016-08-03 01:13:02 +00:00
__field ( int , nr )
) ,
TP_fast_assign (
__entry - > dev = mp ? mp - > m_super - > s_dev : 0 ;
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
2016-08-03 01:13:02 +00:00
__entry - > intent = dfp - > dfp_intent ;
xfs: allow pausing of pending deferred work items
Traditionally, all pending deferred work attached to a transaction is
finished when one of the xfs_defer_finish* functions is called.
However, online repair wants to be able to allocate space for a new data
structure, format a new metadata structure into the allocated space, and
commit that into the filesystem.
As a hedge against system crashes during repairs, we also want to log
some EFI items for the allocated space speculatively, and cancel them if
we elect to commit the new data structure.
Therefore, introduce the idea of pausing a pending deferred work item.
Log intent items are still created for paused items and relogged as
necessary. However, paused items are pushed onto a side list before we
start calling ->finish_item, and the whole list is reattach to the
transaction afterwards. New work items are never attached to paused
pending items.
Modify xfs_defer_cancel to clean up pending deferred work items holding
a log intent item but not a log intent done item, since that is now
possible.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-12-07 02:40:56 +00:00
__entry - > flags = dfp - > dfp_flags ;
2016-08-30 03:51:39 +00:00
__entry - > committed = dfp - > dfp_done ! = NULL ;
2016-08-03 01:13:02 +00:00
__entry - > nr = dfp - > dfp_count ;
) ,
2023-12-13 09:06:31 +00:00
TP_printk ( " dev %d:%d optype %s intent %p flags %s committed %d nr %d " ,
2016-08-03 01:13:02 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2023-12-13 09:06:31 +00:00
__get_str ( name ) ,
2016-08-03 01:13:02 +00:00
__entry - > intent ,
xfs: allow pausing of pending deferred work items
Traditionally, all pending deferred work attached to a transaction is
finished when one of the xfs_defer_finish* functions is called.
However, online repair wants to be able to allocate space for a new data
structure, format a new metadata structure into the allocated space, and
commit that into the filesystem.
As a hedge against system crashes during repairs, we also want to log
some EFI items for the allocated space speculatively, and cancel them if
we elect to commit the new data structure.
Therefore, introduce the idea of pausing a pending deferred work item.
Log intent items are still created for paused items and relogged as
necessary. However, paused items are pushed onto a side list before we
start calling ->finish_item, and the whole list is reattach to the
transaction afterwards. New work items are never attached to paused
pending items.
Modify xfs_defer_cancel to clean up pending deferred work items holding
a log intent item but not a log intent done item, since that is now
possible.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-12-07 02:40:56 +00:00
__print_flags ( __entry - > flags , " | " , XFS_DEFER_PENDING_STRINGS ) ,
2016-08-03 01:13:02 +00:00
__entry - > committed ,
__entry - > nr )
)
# define DEFINE_DEFER_PENDING_EVENT(name) \
DEFINE_EVENT ( xfs_defer_pending_class , name , \
TP_PROTO ( struct xfs_mount * mp , struct xfs_defer_pending * dfp ) , \
TP_ARGS ( mp , dfp ) )
2024-02-22 20:43:43 +00:00
DEFINE_DEFER_EVENT ( xfs_defer_cancel ) ;
DEFINE_DEFER_EVENT ( xfs_defer_trans_roll ) ;
DEFINE_DEFER_EVENT ( xfs_defer_trans_abort ) ;
DEFINE_DEFER_EVENT ( xfs_defer_finish ) ;
DEFINE_DEFER_EVENT ( xfs_defer_finish_done ) ;
DEFINE_DEFER_ERROR_EVENT ( xfs_defer_trans_roll_error ) ;
DEFINE_DEFER_ERROR_EVENT ( xfs_defer_finish_error ) ;
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_create_intent ) ;
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_cancel_list ) ;
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_pending_finish ) ;
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_pending_abort ) ;
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_relog_intent ) ;
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_isolate_paused ) ;
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_item_pause ) ;
DEFINE_DEFER_PENDING_EVENT ( xfs_defer_item_unpause ) ;
DECLARE_EVENT_CLASS ( xfs_free_extent_deferred_class ,
2024-07-02 18:22:50 +00:00
TP_PROTO ( struct xfs_mount * mp , struct xfs_extent_free_item * free ) ,
TP_ARGS ( mp , free ) ,
2016-08-03 01:13:02 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-11-04 04:19:26 +00:00
__field ( enum xfs_group_type , type )
2016-08-03 01:13:02 +00:00
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
2024-07-02 18:22:50 +00:00
__field ( unsigned int , flags )
2016-08-03 01:13:02 +00:00
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
2024-11-04 04:19:26 +00:00
__entry - > type = free - > xefi_group - > xg_type ;
__entry - > agno = free - > xefi_group - > xg_gno ;
__entry - > agbno = xfs_fsb_to_gbno ( mp , free - > xefi_startblock ,
free - > xefi_group - > xg_type ) ;
2024-07-02 18:22:50 +00:00
__entry - > len = free - > xefi_blockcount ;
__entry - > flags = free - > xefi_flags ;
2016-08-03 01:13:02 +00:00
) ,
2024-11-04 04:19:26 +00:00
TP_printk ( " dev %d:%d %sno 0x%x gbno 0x%x fsbcount 0x%x flags 0x%x " ,
2016-08-03 01:13:02 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-11-04 04:19:26 +00:00
__print_symbolic ( __entry - > type , XG_TYPE_STRINGS ) ,
2016-08-03 01:13:02 +00:00
__entry - > agno ,
__entry - > agbno ,
2024-07-02 18:22:50 +00:00
__entry - > len ,
__entry - > flags )
2016-08-03 01:13:02 +00:00
) ;
2024-02-22 20:43:43 +00:00
# define DEFINE_FREE_EXTENT_DEFERRED_EVENT(name) \
DEFINE_EVENT ( xfs_free_extent_deferred_class , name , \
2024-07-02 18:22:50 +00:00
TP_PROTO ( struct xfs_mount * mp , struct xfs_extent_free_item * free ) , \
TP_ARGS ( mp , free ) )
2024-02-22 20:43:43 +00:00
DEFINE_FREE_EXTENT_DEFERRED_EVENT ( xfs_agfl_free_deferred ) ;
2024-07-02 18:22:50 +00:00
DEFINE_FREE_EXTENT_DEFERRED_EVENT ( xfs_extent_free_defer ) ;
DEFINE_FREE_EXTENT_DEFERRED_EVENT ( xfs_extent_free_deferred ) ;
2016-08-03 01:26:33 +00:00
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
DECLARE_EVENT_CLASS ( xfs_defer_pending_item_class ,
TP_PROTO ( struct xfs_mount * mp , struct xfs_defer_pending * dfp ,
void * item ) ,
TP_ARGS ( mp , dfp , item ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2023-12-13 09:06:31 +00:00
__string ( name , dfp - > dfp_ops - > name )
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
__field ( void * , intent )
__field ( void * , item )
__field ( char , committed )
xfs: allow pausing of pending deferred work items
Traditionally, all pending deferred work attached to a transaction is
finished when one of the xfs_defer_finish* functions is called.
However, online repair wants to be able to allocate space for a new data
structure, format a new metadata structure into the allocated space, and
commit that into the filesystem.
As a hedge against system crashes during repairs, we also want to log
some EFI items for the allocated space speculatively, and cancel them if
we elect to commit the new data structure.
Therefore, introduce the idea of pausing a pending deferred work item.
Log intent items are still created for paused items and relogged as
necessary. However, paused items are pushed onto a side list before we
start calling ->finish_item, and the whole list is reattach to the
transaction afterwards. New work items are never attached to paused
pending items.
Modify xfs_defer_cancel to clean up pending deferred work items holding
a log intent item but not a log intent done item, since that is now
possible.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-12-07 02:40:56 +00:00
__field ( unsigned int , flags )
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
__field ( int , nr )
) ,
TP_fast_assign (
__entry - > dev = mp ? mp - > m_super - > s_dev : 0 ;
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
__entry - > intent = dfp - > dfp_intent ;
__entry - > item = item ;
__entry - > committed = dfp - > dfp_done ! = NULL ;
xfs: allow pausing of pending deferred work items
Traditionally, all pending deferred work attached to a transaction is
finished when one of the xfs_defer_finish* functions is called.
However, online repair wants to be able to allocate space for a new data
structure, format a new metadata structure into the allocated space, and
commit that into the filesystem.
As a hedge against system crashes during repairs, we also want to log
some EFI items for the allocated space speculatively, and cancel them if
we elect to commit the new data structure.
Therefore, introduce the idea of pausing a pending deferred work item.
Log intent items are still created for paused items and relogged as
necessary. However, paused items are pushed onto a side list before we
start calling ->finish_item, and the whole list is reattach to the
transaction afterwards. New work items are never attached to paused
pending items.
Modify xfs_defer_cancel to clean up pending deferred work items holding
a log intent item but not a log intent done item, since that is now
possible.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-12-07 02:40:56 +00:00
__entry - > flags = dfp - > dfp_flags ;
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
__entry - > nr = dfp - > dfp_count ;
) ,
2023-12-13 09:06:31 +00:00
TP_printk ( " dev %d:%d optype %s intent %p item %p flags %s committed %d nr %d " ,
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2023-12-13 09:06:31 +00:00
__get_str ( name ) ,
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
__entry - > intent ,
__entry - > item ,
xfs: allow pausing of pending deferred work items
Traditionally, all pending deferred work attached to a transaction is
finished when one of the xfs_defer_finish* functions is called.
However, online repair wants to be able to allocate space for a new data
structure, format a new metadata structure into the allocated space, and
commit that into the filesystem.
As a hedge against system crashes during repairs, we also want to log
some EFI items for the allocated space speculatively, and cancel them if
we elect to commit the new data structure.
Therefore, introduce the idea of pausing a pending deferred work item.
Log intent items are still created for paused items and relogged as
necessary. However, paused items are pushed onto a side list before we
start calling ->finish_item, and the whole list is reattach to the
transaction afterwards. New work items are never attached to paused
pending items.
Modify xfs_defer_cancel to clean up pending deferred work items holding
a log intent item but not a log intent done item, since that is now
possible.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2023-12-07 02:40:56 +00:00
__print_flags ( __entry - > flags , " | " , XFS_DEFER_PENDING_STRINGS ) ,
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
__entry - > committed ,
__entry - > nr )
)
# define DEFINE_DEFER_PENDING_ITEM_EVENT(name) \
DEFINE_EVENT ( xfs_defer_pending_item_class , name , \
TP_PROTO ( struct xfs_mount * mp , struct xfs_defer_pending * dfp , \
void * item ) , \
TP_ARGS ( mp , dfp , item ) )
DEFINE_DEFER_PENDING_ITEM_EVENT ( xfs_defer_add_item ) ;
DEFINE_DEFER_PENDING_ITEM_EVENT ( xfs_defer_cancel_item ) ;
DEFINE_DEFER_PENDING_ITEM_EVENT ( xfs_defer_finish_item ) ;
2016-08-03 01:33:43 +00:00
/* rmap tracepoints */
DECLARE_EVENT_CLASS ( xfs_rmap_class ,
2024-07-02 18:22:58 +00:00
TP_PROTO ( struct xfs_btree_cur * cur ,
2016-08-03 01:33:43 +00:00
xfs_agblock_t agbno , xfs_extlen_t len , bool unwritten ,
2018-12-12 16:46:23 +00:00
const struct xfs_owner_info * oinfo ) ,
2024-07-02 18:22:58 +00:00
TP_ARGS ( cur , agbno , len , unwritten , oinfo ) ,
2016-08-03 01:33:43 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
__field ( uint64_t , owner )
__field ( uint64_t , offset )
__field ( unsigned long , flags )
) ,
TP_fast_assign (
2024-07-02 18:22:58 +00:00
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2016-08-03 01:33:43 +00:00
__entry - > agbno = agbno ;
__entry - > len = len ;
__entry - > owner = oinfo - > oi_owner ;
__entry - > offset = oinfo - > oi_offset ;
__entry - > flags = oinfo - > oi_flags ;
2016-08-03 01:36:07 +00:00
if ( unwritten )
__entry - > flags | = XFS_RMAP_UNWRITTEN ;
2016-08-03 01:33:43 +00:00
) ,
2021-08-17 19:45:59 +00:00
TP_printk ( " dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%lx " ,
2016-08-03 01:33:43 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > len ,
__entry - > owner ,
__entry - > offset ,
__entry - > flags )
) ;
# define DEFINE_RMAP_EVENT(name) \
DEFINE_EVENT ( xfs_rmap_class , name , \
2024-07-02 18:22:58 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , \
2016-08-03 01:33:43 +00:00
xfs_agblock_t agbno , xfs_extlen_t len , bool unwritten , \
2018-12-12 16:46:23 +00:00
const struct xfs_owner_info * oinfo ) , \
2024-07-02 18:22:58 +00:00
TP_ARGS ( cur , agbno , len , unwritten , oinfo ) )
2016-08-03 01:33:43 +00:00
2024-07-02 18:22:57 +00:00
/* btree cursor error/%ip tracepoint class */
DECLARE_EVENT_CLASS ( xfs_btree_error_class ,
TP_PROTO ( struct xfs_btree_cur * cur , int error ,
2016-08-03 01:33:43 +00:00
unsigned long caller_ip ) ,
2024-07-02 18:22:57 +00:00
TP_ARGS ( cur , error , caller_ip ) ,
2016-08-03 01:33:43 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
2024-07-02 18:22:57 +00:00
__field ( xfs_ino_t , ino )
2016-08-03 01:33:43 +00:00
__field ( int , error )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
2024-07-02 18:22:57 +00:00
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
switch ( cur - > bc_ops - > type ) {
case XFS_BTREE_TYPE_INODE :
__entry - > agno = 0 ;
__entry - > ino = cur - > bc_ino . ip - > i_ino ;
break ;
case XFS_BTREE_TYPE_AG :
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2024-07-02 18:22:57 +00:00
__entry - > ino = 0 ;
break ;
case XFS_BTREE_TYPE_MEM :
__entry - > agno = 0 ;
__entry - > ino = 0 ;
break ;
}
2016-08-03 01:33:43 +00:00
__entry - > error = error ;
__entry - > caller_ip = caller_ip ;
) ,
2024-07-02 18:22:57 +00:00
TP_printk ( " dev %d:%d agno 0x%x ino 0x%llx error %d caller %pS " ,
2016-08-03 01:33:43 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
2024-07-02 18:22:57 +00:00
__entry - > ino ,
2016-08-03 01:33:43 +00:00
__entry - > error ,
( char * ) __entry - > caller_ip )
) ;
2024-07-02 18:22:57 +00:00
# define DEFINE_BTREE_ERROR_EVENT(name) \
DEFINE_EVENT ( xfs_btree_error_class , name , \
TP_PROTO ( struct xfs_btree_cur * cur , int error , \
2016-08-03 01:33:43 +00:00
unsigned long caller_ip ) , \
2024-07-02 18:22:57 +00:00
TP_ARGS ( cur , error , caller_ip ) )
2016-08-03 01:33:43 +00:00
DEFINE_RMAP_EVENT ( xfs_rmap_unmap ) ;
DEFINE_RMAP_EVENT ( xfs_rmap_unmap_done ) ;
2024-07-02 18:22:57 +00:00
DEFINE_BTREE_ERROR_EVENT ( xfs_rmap_unmap_error ) ;
2016-08-03 01:33:43 +00:00
DEFINE_RMAP_EVENT ( xfs_rmap_map ) ;
DEFINE_RMAP_EVENT ( xfs_rmap_map_done ) ;
2024-07-02 18:22:57 +00:00
DEFINE_BTREE_ERROR_EVENT ( xfs_rmap_map_error ) ;
2016-08-03 02:03:19 +00:00
DEFINE_RMAP_EVENT ( xfs_rmap_convert ) ;
DEFINE_RMAP_EVENT ( xfs_rmap_convert_done ) ;
2024-07-02 18:22:57 +00:00
DEFINE_BTREE_ERROR_EVENT ( xfs_rmap_convert_error ) ;
TRACE_EVENT ( xfs_rmap_convert_state ,
TP_PROTO ( struct xfs_btree_cur * cur , int state ,
unsigned long caller_ip ) ,
TP_ARGS ( cur , state , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_ino_t , ino )
__field ( int , state )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
switch ( cur - > bc_ops - > type ) {
case XFS_BTREE_TYPE_INODE :
__entry - > agno = 0 ;
__entry - > ino = cur - > bc_ino . ip - > i_ino ;
break ;
case XFS_BTREE_TYPE_AG :
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2024-07-02 18:22:57 +00:00
__entry - > ino = 0 ;
break ;
case XFS_BTREE_TYPE_MEM :
__entry - > agno = 0 ;
__entry - > ino = 0 ;
break ;
}
__entry - > state = state ;
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " dev %d:%d agno 0x%x ino 0x%llx state %d caller %pS " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > ino ,
__entry - > state ,
( char * ) __entry - > caller_ip )
) ;
2016-08-03 01:33:43 +00:00
2016-08-03 01:43:24 +00:00
DECLARE_EVENT_CLASS ( xfs_rmapbt_class ,
2024-07-02 18:22:58 +00:00
TP_PROTO ( struct xfs_btree_cur * cur ,
2016-08-03 01:43:24 +00:00
xfs_agblock_t agbno , xfs_extlen_t len ,
uint64_t owner , uint64_t offset , unsigned int flags ) ,
2024-07-02 18:22:58 +00:00
TP_ARGS ( cur , agbno , len , owner , offset , flags ) ,
2016-08-03 01:43:24 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
__field ( uint64_t , owner )
__field ( uint64_t , offset )
__field ( unsigned int , flags )
) ,
TP_fast_assign (
2024-07-02 18:22:58 +00:00
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2016-08-03 01:43:24 +00:00
__entry - > agbno = agbno ;
__entry - > len = len ;
__entry - > owner = owner ;
__entry - > offset = offset ;
__entry - > flags = flags ;
) ,
2021-08-17 19:45:59 +00:00
TP_printk ( " dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x " ,
2016-08-03 01:43:24 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > len ,
__entry - > owner ,
__entry - > offset ,
__entry - > flags )
) ;
# define DEFINE_RMAPBT_EVENT(name) \
DEFINE_EVENT ( xfs_rmapbt_class , name , \
2024-07-02 18:22:58 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , \
2016-08-03 01:43:24 +00:00
xfs_agblock_t agbno , xfs_extlen_t len , \
uint64_t owner , uint64_t offset , unsigned int flags ) , \
2024-07-02 18:22:58 +00:00
TP_ARGS ( cur , agbno , len , owner , offset , flags ) )
2016-08-03 01:43:24 +00:00
2024-07-02 18:22:59 +00:00
TRACE_DEFINE_ENUM ( XFS_RMAP_MAP ) ;
TRACE_DEFINE_ENUM ( XFS_RMAP_MAP_SHARED ) ;
TRACE_DEFINE_ENUM ( XFS_RMAP_UNMAP ) ;
TRACE_DEFINE_ENUM ( XFS_RMAP_UNMAP_SHARED ) ;
TRACE_DEFINE_ENUM ( XFS_RMAP_CONVERT ) ;
TRACE_DEFINE_ENUM ( XFS_RMAP_CONVERT_SHARED ) ;
TRACE_DEFINE_ENUM ( XFS_RMAP_ALLOC ) ;
TRACE_DEFINE_ENUM ( XFS_RMAP_FREE ) ;
2024-02-22 20:43:43 +00:00
DECLARE_EVENT_CLASS ( xfs_rmap_deferred_class ,
2024-07-02 18:22:59 +00:00
TP_PROTO ( struct xfs_mount * mp , struct xfs_rmap_intent * ri ) ,
TP_ARGS ( mp , ri ) ,
2024-02-22 20:43:43 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-07-02 18:22:59 +00:00
__field ( unsigned long long , owner )
2024-02-22 20:43:43 +00:00
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( int , whichfork )
__field ( xfs_fileoff_t , l_loff )
__field ( xfs_filblks_t , l_len )
__field ( xfs_exntst_t , l_state )
__field ( int , op )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
2024-07-02 18:22:59 +00:00
__entry - > agno = XFS_FSB_TO_AGNO ( mp , ri - > ri_bmap . br_startblock ) ;
__entry - > agbno = XFS_FSB_TO_AGBNO ( mp ,
ri - > ri_bmap . br_startblock ) ;
__entry - > owner = ri - > ri_owner ;
__entry - > whichfork = ri - > ri_whichfork ;
__entry - > l_loff = ri - > ri_bmap . br_startoff ;
__entry - > l_len = ri - > ri_bmap . br_blockcount ;
__entry - > l_state = ri - > ri_bmap . br_state ;
__entry - > op = ri - > ri_type ;
) ,
TP_printk ( " dev %d:%d op %s agno 0x%x agbno 0x%x owner 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__print_symbolic ( __entry - > op , XFS_RMAP_INTENT_STRINGS ) ,
2024-02-22 20:43:43 +00:00
__entry - > agno ,
__entry - > agbno ,
2024-07-02 18:22:59 +00:00
__entry - > owner ,
2024-02-22 20:43:43 +00:00
__print_symbolic ( __entry - > whichfork , XFS_WHICHFORK_STRINGS ) ,
__entry - > l_loff ,
__entry - > l_len ,
__entry - > l_state )
) ;
# define DEFINE_RMAP_DEFERRED_EVENT(name) \
DEFINE_EVENT ( xfs_rmap_deferred_class , name , \
2024-07-02 18:22:59 +00:00
TP_PROTO ( struct xfs_mount * mp , struct xfs_rmap_intent * ri ) , \
TP_ARGS ( mp , ri ) )
2016-08-03 01:43:24 +00:00
DEFINE_RMAP_DEFERRED_EVENT ( xfs_rmap_defer ) ;
DEFINE_RMAP_DEFERRED_EVENT ( xfs_rmap_deferred ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_update ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_insert ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_delete ) ;
2024-07-02 18:22:57 +00:00
DEFINE_BTREE_ERROR_EVENT ( xfs_rmap_insert_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_rmap_delete_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_rmap_update_error ) ;
2016-10-03 16:11:48 +00:00
DEFINE_RMAPBT_EVENT ( xfs_rmap_find_left_neighbor_candidate ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_find_left_neighbor_query ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_lookup_le_range_candidate ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_lookup_le_range ) ;
2016-08-03 01:44:21 +00:00
DEFINE_RMAPBT_EVENT ( xfs_rmap_lookup_le_range_result ) ;
DEFINE_RMAPBT_EVENT ( xfs_rmap_find_right_neighbor_result ) ;
2016-08-03 02:03:19 +00:00
DEFINE_RMAPBT_EVENT ( xfs_rmap_find_left_neighbor_result ) ;
2016-08-03 01:39:05 +00:00
2016-10-03 16:11:28 +00:00
/* deferred bmbt updates */
2024-02-22 20:43:53 +00:00
TRACE_DEFINE_ENUM ( XFS_BMAP_MAP ) ;
TRACE_DEFINE_ENUM ( XFS_BMAP_UNMAP ) ;
2024-02-22 20:43:43 +00:00
DECLARE_EVENT_CLASS ( xfs_bmap_deferred_class ,
2024-02-22 20:43:53 +00:00
TP_PROTO ( struct xfs_bmap_intent * bi ) ,
TP_ARGS ( bi ) ,
2024-02-22 20:43:43 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-11-04 04:19:25 +00:00
__field ( enum xfs_group_type , type )
2024-02-22 20:43:43 +00:00
__field ( xfs_agnumber_t , agno )
__field ( xfs_ino_t , ino )
2024-11-04 04:19:25 +00:00
__field ( unsigned long long , gbno )
2024-02-22 20:43:43 +00:00
__field ( int , whichfork )
__field ( xfs_fileoff_t , l_loff )
__field ( xfs_filblks_t , l_len )
__field ( xfs_exntst_t , l_state )
__field ( int , op )
) ,
TP_fast_assign (
2024-02-22 20:43:53 +00:00
struct xfs_inode * ip = bi - > bi_owner ;
2024-11-04 04:19:25 +00:00
struct xfs_mount * mp = ip - > i_mount ;
2024-02-22 20:43:53 +00:00
2024-11-04 04:19:25 +00:00
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > type = bi - > bi_group - > xg_type ;
__entry - > agno = bi - > bi_group - > xg_gno ;
if ( bi - > bi_group - > xg_type = = XG_TYPE_RTG & &
! xfs_has_rtgroups ( mp ) ) {
/*
* Legacy rt filesystems do not have allocation groups
* ondisk . We emulate this incore with one gigantic
* rtgroup whose size can exceed a 32 - bit block number .
* For this tracepoint , we report group 0 and a 64 - bit
* group block number .
*/
__entry - > gbno = bi - > bi_bmap . br_startblock ;
2024-02-22 20:44:23 +00:00
} else {
2024-11-04 04:19:25 +00:00
__entry - > gbno = xfs_fsb_to_gbno ( mp ,
bi - > bi_bmap . br_startblock ,
bi - > bi_group - > xg_type ) ;
2024-02-22 20:44:23 +00:00
}
2024-02-22 20:43:53 +00:00
__entry - > ino = ip - > i_ino ;
__entry - > whichfork = bi - > bi_whichfork ;
__entry - > l_loff = bi - > bi_bmap . br_startoff ;
__entry - > l_len = bi - > bi_bmap . br_blockcount ;
__entry - > l_state = bi - > bi_bmap . br_state ;
__entry - > op = bi - > bi_type ;
2024-02-22 20:43:43 +00:00
) ,
2024-11-04 04:19:25 +00:00
TP_printk ( " dev %d:%d op %s ino 0x%llx %sno 0x%x gbno 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d " ,
2024-02-22 20:43:43 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-02-22 20:43:53 +00:00
__print_symbolic ( __entry - > op , XFS_BMAP_INTENT_STRINGS ) ,
__entry - > ino ,
2024-11-04 04:19:25 +00:00
__print_symbolic ( __entry - > type , XG_TYPE_STRINGS ) ,
2024-02-22 20:43:43 +00:00
__entry - > agno ,
2024-11-04 04:19:25 +00:00
__entry - > gbno ,
2024-02-22 20:43:43 +00:00
__print_symbolic ( __entry - > whichfork , XFS_WHICHFORK_STRINGS ) ,
__entry - > l_loff ,
__entry - > l_len ,
__entry - > l_state )
) ;
# define DEFINE_BMAP_DEFERRED_EVENT(name) \
DEFINE_EVENT ( xfs_bmap_deferred_class , name , \
2024-02-22 20:43:53 +00:00
TP_PROTO ( struct xfs_bmap_intent * bi ) , \
TP_ARGS ( bi ) )
2016-10-03 16:11:28 +00:00
DEFINE_BMAP_DEFERRED_EVENT ( xfs_bmap_defer ) ;
DEFINE_BMAP_DEFERRED_EVENT ( xfs_bmap_deferred ) ;
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 00:30:52 +00:00
/* per-AG reservation */
DECLARE_EVENT_CLASS ( xfs_ag_resv_class ,
TP_PROTO ( struct xfs_perag * pag , enum xfs_ag_resv_type resv ,
xfs_extlen_t len ) ,
TP_ARGS ( pag , resv , len ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( int , resv )
__field ( xfs_extlen_t , freeblks )
__field ( xfs_extlen_t , flcount )
__field ( xfs_extlen_t , reserved )
__field ( xfs_extlen_t , asked )
__field ( xfs_extlen_t , len )
) ,
TP_fast_assign (
struct xfs_ag_resv * r = xfs_perag_resv ( pag , resv ) ;
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( pag ) - > m_super - > s_dev ;
__entry - > agno = pag_agno ( pag ) ;
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 00:30:52 +00:00
__entry - > resv = resv ;
__entry - > freeblks = pag - > pagf_freeblks ;
__entry - > flcount = pag - > pagf_flcount ;
__entry - > reserved = r ? r - > ar_reserved : 0 ;
__entry - > asked = r ? r - > ar_asked : 0 ;
__entry - > len = len ;
) ,
2021-08-17 16:24:26 +00:00
TP_printk ( " dev %d:%d agno 0x%x resv %d freeblks %u flcount %u "
2017-04-14 18:43:27 +00:00
" resv %u ask %u len %u " ,
xfs: set up per-AG free space reservations
One unfortunate quirk of the reference count and reverse mapping
btrees -- they can expand in size when blocks are written to *other*
allocation groups if, say, one large extent becomes a lot of tiny
extents. Since we don't want to start throwing errors in the middle
of CoWing, we need to reserve some blocks to handle future expansion.
The transaction block reservation counters aren't sufficient here
because we have to have a reserve of blocks in every AG, not just
somewhere in the filesystem.
Therefore, create two per-AG block reservation pools. One feeds the
AGFL so that rmapbt expansion always succeeds, and the other feeds all
other metadata so that refcountbt expansion never fails.
Use the count of how many reserved blocks we need to have on hand to
create a virtual reservation in the AG. Through selective clamping of
the maximum length of allocation requests and of the length of the
longest free extent, we can make it look like there's less free space
in the AG unless the reservation owner is asking for blocks.
In other words, play some accounting tricks in-core to make sure that
we always have blocks available. On the plus side, there's nothing to
clean up if we crash, which is contrast to the strategy that the rough
draft used (actually removing extents from the freespace btrees).
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 00:30:52 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > resv ,
__entry - > freeblks ,
__entry - > flcount ,
__entry - > reserved ,
__entry - > asked ,
__entry - > len )
)
# define DEFINE_AG_RESV_EVENT(name) \
DEFINE_EVENT ( xfs_ag_resv_class , name , \
TP_PROTO ( struct xfs_perag * pag , enum xfs_ag_resv_type type , \
xfs_extlen_t len ) , \
TP_ARGS ( pag , type , len ) )
/* per-AG reservation tracepoints */
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_init ) ;
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_free ) ;
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_alloc_extent ) ;
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_free_extent ) ;
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_critical ) ;
DEFINE_AG_RESV_EVENT ( xfs_ag_resv_needed ) ;
2024-11-04 04:18:32 +00:00
TRACE_EVENT ( xfs_ag_resv_init_error ,
TP_PROTO ( const struct xfs_perag * pag , int error ,
2024-07-02 18:22:57 +00:00
unsigned long caller_ip ) ,
2024-11-04 04:18:32 +00:00
TP_ARGS ( pag , error , caller_ip ) ,
2024-07-02 18:22:57 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( int , error )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( pag ) - > m_super - > s_dev ;
__entry - > agno = pag_agno ( pag ) ;
2024-07-02 18:22:57 +00:00
__entry - > error = error ;
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " dev %d:%d agno 0x%x error %d caller %pS " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > error ,
( char * ) __entry - > caller_ip )
) ;
2016-10-03 16:11:15 +00:00
/* refcount tracepoint classes */
2024-07-02 18:23:05 +00:00
DECLARE_EVENT_CLASS ( xfs_refcount_class ,
TP_PROTO ( struct xfs_btree_cur * cur , xfs_agblock_t agbno ,
xfs_extlen_t len ) ,
TP_ARGS ( cur , agbno , len ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2024-07-02 18:23:05 +00:00
__entry - > agbno = agbno ;
__entry - > len = len ;
) ,
TP_printk ( " dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > len )
) ;
# define DEFINE_REFCOUNT_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_class , name , \
TP_PROTO ( struct xfs_btree_cur * cur , xfs_agblock_t agbno , \
xfs_extlen_t len ) , \
TP_ARGS ( cur , agbno , len ) )
2016-10-03 16:11:15 +00:00
2018-12-18 22:32:29 +00:00
TRACE_DEFINE_ENUM ( XFS_LOOKUP_EQi ) ;
TRACE_DEFINE_ENUM ( XFS_LOOKUP_LEi ) ;
TRACE_DEFINE_ENUM ( XFS_LOOKUP_GEi ) ;
2024-07-02 18:23:05 +00:00
TRACE_EVENT ( xfs_refcount_lookup ,
TP_PROTO ( struct xfs_btree_cur * cur , xfs_agblock_t agbno ,
xfs_lookup_t dir ) ,
TP_ARGS ( cur , agbno , dir ) ,
2016-10-03 16:11:15 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_lookup_t , dir )
) ,
TP_fast_assign (
2024-07-02 18:23:05 +00:00
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2016-10-03 16:11:15 +00:00
__entry - > agbno = agbno ;
__entry - > dir = dir ;
) ,
2021-08-17 16:28:53 +00:00
TP_printk ( " dev %d:%d agno 0x%x agbno 0x%x cmp %s(%d) " ,
2016-10-03 16:11:15 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agbno ,
__print_symbolic ( __entry - > dir , XFS_AG_BTREE_CMP_FORMAT_STR ) ,
__entry - > dir )
)
/* single-rcext tracepoint class */
DECLARE_EVENT_CLASS ( xfs_refcount_extent_class ,
2024-07-02 18:23:06 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , struct xfs_refcount_irec * irec ) ,
TP_ARGS ( cur , irec ) ,
2016-10-03 16:11:15 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
2022-10-26 21:23:58 +00:00
__field ( enum xfs_refc_domain , domain )
2016-10-03 16:11:15 +00:00
__field ( xfs_agblock_t , startblock )
__field ( xfs_extlen_t , blockcount )
__field ( xfs_nlink_t , refcount )
) ,
TP_fast_assign (
2024-07-02 18:23:06 +00:00
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2022-10-26 21:23:58 +00:00
__entry - > domain = irec - > rc_domain ;
2016-10-03 16:11:15 +00:00
__entry - > startblock = irec - > rc_startblock ;
__entry - > blockcount = irec - > rc_blockcount ;
__entry - > refcount = irec - > rc_refcount ;
) ,
2022-10-26 21:23:58 +00:00
TP_printk ( " dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u " ,
2016-10-03 16:11:15 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
2022-10-26 21:23:58 +00:00
__print_symbolic ( __entry - > domain , XFS_REFC_DOMAIN_STRINGS ) ,
2016-10-03 16:11:15 +00:00
__entry - > startblock ,
__entry - > blockcount ,
__entry - > refcount )
)
# define DEFINE_REFCOUNT_EXTENT_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_extent_class , name , \
2024-07-02 18:23:06 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , struct xfs_refcount_irec * irec ) , \
TP_ARGS ( cur , irec ) )
2016-10-03 16:11:15 +00:00
/* single-rcext and an agbno tracepoint class */
DECLARE_EVENT_CLASS ( xfs_refcount_extent_at_class ,
2024-07-02 18:23:06 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , struct xfs_refcount_irec * irec ,
xfs_agblock_t agbno ) ,
TP_ARGS ( cur , irec , agbno ) ,
2016-10-03 16:11:15 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
2022-10-26 21:23:58 +00:00
__field ( enum xfs_refc_domain , domain )
2016-10-03 16:11:15 +00:00
__field ( xfs_agblock_t , startblock )
__field ( xfs_extlen_t , blockcount )
__field ( xfs_nlink_t , refcount )
__field ( xfs_agblock_t , agbno )
) ,
TP_fast_assign (
2024-07-02 18:23:06 +00:00
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2022-10-26 21:23:58 +00:00
__entry - > domain = irec - > rc_domain ;
2016-10-03 16:11:15 +00:00
__entry - > startblock = irec - > rc_startblock ;
__entry - > blockcount = irec - > rc_blockcount ;
__entry - > refcount = irec - > rc_refcount ;
__entry - > agbno = agbno ;
) ,
2022-10-26 21:23:58 +00:00
TP_printk ( " dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x " ,
2016-10-03 16:11:15 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
2022-10-26 21:23:58 +00:00
__print_symbolic ( __entry - > domain , XFS_REFC_DOMAIN_STRINGS ) ,
2016-10-03 16:11:15 +00:00
__entry - > startblock ,
__entry - > blockcount ,
__entry - > refcount ,
__entry - > agbno )
)
# define DEFINE_REFCOUNT_EXTENT_AT_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_extent_at_class , name , \
2024-07-02 18:23:06 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , struct xfs_refcount_irec * irec , \
xfs_agblock_t agbno ) , \
TP_ARGS ( cur , irec , agbno ) )
2016-10-03 16:11:15 +00:00
/* double-rcext tracepoint class */
DECLARE_EVENT_CLASS ( xfs_refcount_double_extent_class ,
2024-07-02 18:23:06 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , struct xfs_refcount_irec * i1 ,
struct xfs_refcount_irec * i2 ) ,
TP_ARGS ( cur , i1 , i2 ) ,
2016-10-03 16:11:15 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
2022-10-26 21:23:58 +00:00
__field ( enum xfs_refc_domain , i1_domain )
2016-10-03 16:11:15 +00:00
__field ( xfs_agblock_t , i1_startblock )
__field ( xfs_extlen_t , i1_blockcount )
__field ( xfs_nlink_t , i1_refcount )
2022-10-26 21:23:58 +00:00
__field ( enum xfs_refc_domain , i2_domain )
2016-10-03 16:11:15 +00:00
__field ( xfs_agblock_t , i2_startblock )
__field ( xfs_extlen_t , i2_blockcount )
__field ( xfs_nlink_t , i2_refcount )
) ,
TP_fast_assign (
2024-07-02 18:23:06 +00:00
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2022-10-26 21:23:58 +00:00
__entry - > i1_domain = i1 - > rc_domain ;
2016-10-03 16:11:15 +00:00
__entry - > i1_startblock = i1 - > rc_startblock ;
__entry - > i1_blockcount = i1 - > rc_blockcount ;
__entry - > i1_refcount = i1 - > rc_refcount ;
2022-10-26 21:23:58 +00:00
__entry - > i2_domain = i2 - > rc_domain ;
2016-10-03 16:11:15 +00:00
__entry - > i2_startblock = i2 - > rc_startblock ;
__entry - > i2_blockcount = i2 - > rc_blockcount ;
__entry - > i2_refcount = i2 - > rc_refcount ;
) ,
2022-10-26 21:23:58 +00:00
TP_printk ( " dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
" dom %s agbno 0x%x fsbcount 0x%x refcount %u " ,
2016-10-03 16:11:15 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
2022-10-26 21:23:58 +00:00
__print_symbolic ( __entry - > i1_domain , XFS_REFC_DOMAIN_STRINGS ) ,
2016-10-03 16:11:15 +00:00
__entry - > i1_startblock ,
__entry - > i1_blockcount ,
__entry - > i1_refcount ,
2022-10-26 21:23:58 +00:00
__print_symbolic ( __entry - > i2_domain , XFS_REFC_DOMAIN_STRINGS ) ,
2016-10-03 16:11:15 +00:00
__entry - > i2_startblock ,
__entry - > i2_blockcount ,
__entry - > i2_refcount )
)
# define DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_double_extent_class , name , \
2024-07-02 18:23:06 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , struct xfs_refcount_irec * i1 , \
struct xfs_refcount_irec * i2 ) , \
TP_ARGS ( cur , i1 , i2 ) )
2016-10-03 16:11:15 +00:00
/* double-rcext and an agbno tracepoint class */
DECLARE_EVENT_CLASS ( xfs_refcount_double_extent_at_class ,
2024-07-02 18:23:06 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , struct xfs_refcount_irec * i1 ,
struct xfs_refcount_irec * i2 , xfs_agblock_t agbno ) ,
TP_ARGS ( cur , i1 , i2 , agbno ) ,
2016-10-03 16:11:15 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
2022-10-26 21:23:58 +00:00
__field ( enum xfs_refc_domain , i1_domain )
2016-10-03 16:11:15 +00:00
__field ( xfs_agblock_t , i1_startblock )
__field ( xfs_extlen_t , i1_blockcount )
__field ( xfs_nlink_t , i1_refcount )
2022-10-26 21:23:58 +00:00
__field ( enum xfs_refc_domain , i2_domain )
2016-10-03 16:11:15 +00:00
__field ( xfs_agblock_t , i2_startblock )
__field ( xfs_extlen_t , i2_blockcount )
__field ( xfs_nlink_t , i2_refcount )
__field ( xfs_agblock_t , agbno )
) ,
TP_fast_assign (
2024-07-02 18:23:06 +00:00
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2022-10-26 21:23:58 +00:00
__entry - > i1_domain = i1 - > rc_domain ;
2016-10-03 16:11:15 +00:00
__entry - > i1_startblock = i1 - > rc_startblock ;
__entry - > i1_blockcount = i1 - > rc_blockcount ;
__entry - > i1_refcount = i1 - > rc_refcount ;
2022-10-26 21:23:58 +00:00
__entry - > i2_domain = i2 - > rc_domain ;
2016-10-03 16:11:15 +00:00
__entry - > i2_startblock = i2 - > rc_startblock ;
__entry - > i2_blockcount = i2 - > rc_blockcount ;
__entry - > i2_refcount = i2 - > rc_refcount ;
__entry - > agbno = agbno ;
) ,
2022-10-26 21:23:58 +00:00
TP_printk ( " dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
" dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x " ,
2016-10-03 16:11:15 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
2022-10-26 21:23:58 +00:00
__print_symbolic ( __entry - > i1_domain , XFS_REFC_DOMAIN_STRINGS ) ,
2016-10-03 16:11:15 +00:00
__entry - > i1_startblock ,
__entry - > i1_blockcount ,
__entry - > i1_refcount ,
2022-10-26 21:23:58 +00:00
__print_symbolic ( __entry - > i2_domain , XFS_REFC_DOMAIN_STRINGS ) ,
2016-10-03 16:11:15 +00:00
__entry - > i2_startblock ,
__entry - > i2_blockcount ,
__entry - > i2_refcount ,
__entry - > agbno )
)
# define DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_double_extent_at_class , name , \
2024-07-02 18:23:06 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , struct xfs_refcount_irec * i1 , \
struct xfs_refcount_irec * i2 , xfs_agblock_t agbno ) , \
TP_ARGS ( cur , i1 , i2 , agbno ) )
2016-10-03 16:11:15 +00:00
/* triple-rcext tracepoint class */
DECLARE_EVENT_CLASS ( xfs_refcount_triple_extent_class ,
2024-07-02 18:23:06 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , struct xfs_refcount_irec * i1 ,
struct xfs_refcount_irec * i2 , struct xfs_refcount_irec * i3 ) ,
TP_ARGS ( cur , i1 , i2 , i3 ) ,
2016-10-03 16:11:15 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
2022-10-26 21:23:58 +00:00
__field ( enum xfs_refc_domain , i1_domain )
2016-10-03 16:11:15 +00:00
__field ( xfs_agblock_t , i1_startblock )
__field ( xfs_extlen_t , i1_blockcount )
__field ( xfs_nlink_t , i1_refcount )
2022-10-26 21:23:58 +00:00
__field ( enum xfs_refc_domain , i2_domain )
2016-10-03 16:11:15 +00:00
__field ( xfs_agblock_t , i2_startblock )
__field ( xfs_extlen_t , i2_blockcount )
__field ( xfs_nlink_t , i2_refcount )
2022-10-26 21:23:58 +00:00
__field ( enum xfs_refc_domain , i3_domain )
2016-10-03 16:11:15 +00:00
__field ( xfs_agblock_t , i3_startblock )
__field ( xfs_extlen_t , i3_blockcount )
__field ( xfs_nlink_t , i3_refcount )
) ,
TP_fast_assign (
2024-07-02 18:23:06 +00:00
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2022-10-26 21:23:58 +00:00
__entry - > i1_domain = i1 - > rc_domain ;
2016-10-03 16:11:15 +00:00
__entry - > i1_startblock = i1 - > rc_startblock ;
__entry - > i1_blockcount = i1 - > rc_blockcount ;
__entry - > i1_refcount = i1 - > rc_refcount ;
2022-10-26 21:23:58 +00:00
__entry - > i2_domain = i2 - > rc_domain ;
2016-10-03 16:11:15 +00:00
__entry - > i2_startblock = i2 - > rc_startblock ;
__entry - > i2_blockcount = i2 - > rc_blockcount ;
__entry - > i2_refcount = i2 - > rc_refcount ;
2022-10-26 21:23:58 +00:00
__entry - > i3_domain = i3 - > rc_domain ;
2016-10-03 16:11:15 +00:00
__entry - > i3_startblock = i3 - > rc_startblock ;
__entry - > i3_blockcount = i3 - > rc_blockcount ;
__entry - > i3_refcount = i3 - > rc_refcount ;
) ,
2022-10-26 21:23:58 +00:00
TP_printk ( " dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
" dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
" dom %s agbno 0x%x fsbcount 0x%x refcount %u " ,
2016-10-03 16:11:15 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
2022-10-26 21:23:58 +00:00
__print_symbolic ( __entry - > i1_domain , XFS_REFC_DOMAIN_STRINGS ) ,
2016-10-03 16:11:15 +00:00
__entry - > i1_startblock ,
__entry - > i1_blockcount ,
__entry - > i1_refcount ,
2022-10-26 21:23:58 +00:00
__print_symbolic ( __entry - > i2_domain , XFS_REFC_DOMAIN_STRINGS ) ,
2016-10-03 16:11:15 +00:00
__entry - > i2_startblock ,
__entry - > i2_blockcount ,
__entry - > i2_refcount ,
2022-10-26 21:23:58 +00:00
__print_symbolic ( __entry - > i3_domain , XFS_REFC_DOMAIN_STRINGS ) ,
2016-10-03 16:11:15 +00:00
__entry - > i3_startblock ,
__entry - > i3_blockcount ,
__entry - > i3_refcount )
) ;
# define DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_triple_extent_class , name , \
2024-07-02 18:23:06 +00:00
TP_PROTO ( struct xfs_btree_cur * cur , struct xfs_refcount_irec * i1 , \
struct xfs_refcount_irec * i2 , struct xfs_refcount_irec * i3 ) , \
TP_ARGS ( cur , i1 , i2 , i3 ) )
2016-10-03 16:11:15 +00:00
/* refcount btree tracepoints */
DEFINE_REFCOUNT_EXTENT_EVENT ( xfs_refcount_get ) ;
DEFINE_REFCOUNT_EXTENT_EVENT ( xfs_refcount_update ) ;
DEFINE_REFCOUNT_EXTENT_EVENT ( xfs_refcount_insert ) ;
DEFINE_REFCOUNT_EXTENT_EVENT ( xfs_refcount_delete ) ;
2024-07-02 18:23:05 +00:00
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_insert_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_delete_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_update_error ) ;
2016-10-03 16:11:15 +00:00
/* refcount adjustment tracepoints */
2024-07-02 18:23:05 +00:00
DEFINE_REFCOUNT_EVENT ( xfs_refcount_increase ) ;
DEFINE_REFCOUNT_EVENT ( xfs_refcount_decrease ) ;
DEFINE_REFCOUNT_EVENT ( xfs_refcount_cow_increase ) ;
DEFINE_REFCOUNT_EVENT ( xfs_refcount_cow_decrease ) ;
2016-10-03 16:11:15 +00:00
DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT ( xfs_refcount_merge_center_extents ) ;
DEFINE_REFCOUNT_EXTENT_EVENT ( xfs_refcount_modify_extent ) ;
DEFINE_REFCOUNT_EXTENT_AT_EVENT ( xfs_refcount_split_extent ) ;
DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT ( xfs_refcount_merge_left_extent ) ;
DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT ( xfs_refcount_merge_right_extent ) ;
DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT ( xfs_refcount_find_left_extent ) ;
DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT ( xfs_refcount_find_right_extent ) ;
2024-07-02 18:23:05 +00:00
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_adjust_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_adjust_cow_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_merge_center_extents_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_modify_extent_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_split_extent_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_merge_left_extent_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_merge_right_extent_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_find_left_extent_error ) ;
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_find_right_extent_error ) ;
2016-10-03 16:11:15 +00:00
/* reflink helpers */
2024-07-02 18:23:05 +00:00
DEFINE_REFCOUNT_EVENT ( xfs_refcount_find_shared ) ;
DEFINE_REFCOUNT_EVENT ( xfs_refcount_find_shared_result ) ;
2024-07-02 18:23:05 +00:00
DEFINE_BTREE_ERROR_EVENT ( xfs_refcount_find_shared_error ) ;
2024-02-22 20:43:43 +00:00
2024-07-02 18:23:07 +00:00
TRACE_DEFINE_ENUM ( XFS_REFCOUNT_INCREASE ) ;
TRACE_DEFINE_ENUM ( XFS_REFCOUNT_DECREASE ) ;
TRACE_DEFINE_ENUM ( XFS_REFCOUNT_ALLOC_COW ) ;
TRACE_DEFINE_ENUM ( XFS_REFCOUNT_FREE_COW ) ;
2024-02-22 20:43:43 +00:00
DECLARE_EVENT_CLASS ( xfs_refcount_deferred_class ,
2024-07-02 18:23:07 +00:00
TP_PROTO ( struct xfs_mount * mp , struct xfs_refcount_intent * refc ) ,
TP_ARGS ( mp , refc ) ,
2024-02-22 20:43:43 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
2024-07-02 18:23:07 +00:00
__field ( int , op )
2024-02-22 20:43:43 +00:00
__field ( xfs_agblock_t , agbno )
__field ( xfs_extlen_t , len )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
2024-07-02 18:23:07 +00:00
__entry - > agno = XFS_FSB_TO_AGNO ( mp , refc - > ri_startblock ) ;
__entry - > op = refc - > ri_type ;
__entry - > agbno = XFS_FSB_TO_AGBNO ( mp , refc - > ri_startblock ) ;
__entry - > len = refc - > ri_blockcount ;
2024-02-22 20:43:43 +00:00
) ,
2024-07-02 18:23:07 +00:00
TP_printk ( " dev %d:%d op %s agno 0x%x agbno 0x%x fsbcount 0x%x " ,
2024-02-22 20:43:43 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-07-02 18:23:07 +00:00
__print_symbolic ( __entry - > op , XFS_REFCOUNT_INTENT_STRINGS ) ,
2024-02-22 20:43:43 +00:00
__entry - > agno ,
__entry - > agbno ,
__entry - > len )
) ;
# define DEFINE_REFCOUNT_DEFERRED_EVENT(name) \
DEFINE_EVENT ( xfs_refcount_deferred_class , name , \
2024-07-02 18:23:07 +00:00
TP_PROTO ( struct xfs_mount * mp , struct xfs_refcount_intent * refc ) , \
TP_ARGS ( mp , refc ) )
2016-10-03 16:11:22 +00:00
DEFINE_REFCOUNT_DEFERRED_EVENT ( xfs_refcount_defer ) ;
DEFINE_REFCOUNT_DEFERRED_EVENT ( xfs_refcount_deferred ) ;
2024-07-02 18:23:07 +00:00
DEFINE_REFCOUNT_DEFERRED_EVENT ( xfs_refcount_finish_one_leftover ) ;
2016-10-03 16:11:21 +00:00
2016-10-03 16:11:27 +00:00
/* simple inode-based error/%ip tracepoint class */
DECLARE_EVENT_CLASS ( xfs_inode_error_class ,
TP_PROTO ( struct xfs_inode * ip , int error , unsigned long caller_ip ) ,
TP_ARGS ( ip , error , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( int , error )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > error = error ;
__entry - > caller_ip = caller_ip ;
) ,
2021-08-17 16:20:27 +00:00
TP_printk ( " dev %d:%d ino 0x%llx error %d caller %pS " ,
2016-10-03 16:11:27 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > error ,
( char * ) __entry - > caller_ip )
) ;
# define DEFINE_INODE_ERROR_EVENT(name) \
DEFINE_EVENT ( xfs_inode_error_class , name , \
TP_PROTO ( struct xfs_inode * ip , int error , \
unsigned long caller_ip ) , \
TP_ARGS ( ip , error , caller_ip ) )
2016-10-03 16:11:30 +00:00
/* reflink tracepoint classes */
/* two-file io tracepoint class */
DECLARE_EVENT_CLASS ( xfs_double_io_class ,
TP_PROTO ( struct xfs_inode * src , xfs_off_t soffset , xfs_off_t len ,
struct xfs_inode * dest , xfs_off_t doffset ) ,
TP_ARGS ( src , soffset , len , dest , doffset ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , src_ino )
__field ( loff_t , src_isize )
__field ( loff_t , src_disize )
__field ( loff_t , src_offset )
2021-08-17 20:00:13 +00:00
__field ( long long , len )
2016-10-03 16:11:30 +00:00
__field ( xfs_ino_t , dest_ino )
__field ( loff_t , dest_isize )
__field ( loff_t , dest_disize )
__field ( loff_t , dest_offset )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( src ) - > i_sb - > s_dev ;
__entry - > src_ino = src - > i_ino ;
__entry - > src_isize = VFS_I ( src ) - > i_size ;
2021-03-29 18:11:40 +00:00
__entry - > src_disize = src - > i_disk_size ;
2016-10-03 16:11:30 +00:00
__entry - > src_offset = soffset ;
__entry - > len = len ;
__entry - > dest_ino = dest - > i_ino ;
__entry - > dest_isize = VFS_I ( dest ) - > i_size ;
2021-03-29 18:11:40 +00:00
__entry - > dest_disize = dest - > i_disk_size ;
2016-10-03 16:11:30 +00:00
__entry - > dest_offset = doffset ;
) ,
2021-08-17 20:00:13 +00:00
TP_printk ( " dev %d:%d bytecount 0x%llx "
2021-08-17 17:09:12 +00:00
" ino 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx -> "
" ino 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx " ,
2016-10-03 16:11:30 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > len ,
__entry - > src_ino ,
__entry - > src_isize ,
__entry - > src_disize ,
__entry - > src_offset ,
__entry - > dest_ino ,
__entry - > dest_isize ,
__entry - > dest_disize ,
__entry - > dest_offset )
)
# define DEFINE_DOUBLE_IO_EVENT(name) \
DEFINE_EVENT ( xfs_double_io_class , name , \
TP_PROTO ( struct xfs_inode * src , xfs_off_t soffset , xfs_off_t len , \
struct xfs_inode * dest , xfs_off_t doffset ) , \
TP_ARGS ( src , soffset , len , dest , doffset ) )
/* inode/irec events */
DECLARE_EVENT_CLASS ( xfs_inode_irec_class ,
TP_PROTO ( struct xfs_inode * ip , struct xfs_bmbt_irec * irec ) ,
TP_ARGS ( ip , irec ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( xfs_fileoff_t , lblk )
__field ( xfs_extlen_t , len )
__field ( xfs_fsblock_t , pblk )
2017-02-02 23:14:02 +00:00
__field ( int , state )
2016-10-03 16:11:30 +00:00
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > lblk = irec - > br_startoff ;
__entry - > len = irec - > br_blockcount ;
__entry - > pblk = irec - > br_startblock ;
2017-02-02 23:14:02 +00:00
__entry - > state = irec - > br_state ;
2016-10-03 16:11:30 +00:00
) ,
2021-08-17 19:45:59 +00:00
TP_printk ( " dev %d:%d ino 0x%llx fileoff 0x%llx fsbcount 0x%x startblock 0x%llx st %d " ,
2016-10-03 16:11:30 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > lblk ,
__entry - > len ,
2017-02-02 23:14:02 +00:00
__entry - > pblk ,
__entry - > state )
2016-10-03 16:11:30 +00:00
) ;
# define DEFINE_INODE_IREC_EVENT(name) \
DEFINE_EVENT ( xfs_inode_irec_class , name , \
TP_PROTO ( struct xfs_inode * ip , struct xfs_bmbt_irec * irec ) , \
TP_ARGS ( ip , irec ) )
2022-11-29 01:24:35 +00:00
/* inode iomap invalidation events */
DECLARE_EVENT_CLASS ( xfs_wb_invalid_class ,
TP_PROTO ( struct xfs_inode * ip , const struct iomap * iomap , unsigned int wpcseq , int whichfork ) ,
TP_ARGS ( ip , iomap , wpcseq , whichfork ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( u64 , addr )
__field ( loff_t , pos )
__field ( u64 , len )
__field ( u16 , type )
__field ( u16 , flags )
__field ( u32 , wpcseq )
__field ( u32 , forkseq )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > addr = iomap - > addr ;
__entry - > pos = iomap - > offset ;
__entry - > len = iomap - > length ;
__entry - > type = iomap - > type ;
__entry - > flags = iomap - > flags ;
__entry - > wpcseq = wpcseq ;
__entry - > forkseq = READ_ONCE ( xfs_ifork_ptr ( ip , whichfork ) - > if_seq ) ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x wpcseq 0x%x forkseq 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > pos ,
__entry - > addr ,
__entry - > len ,
__entry - > type ,
__entry - > flags ,
__entry - > wpcseq ,
__entry - > forkseq )
) ;
# define DEFINE_WB_INVALID_EVENT(name) \
DEFINE_EVENT ( xfs_wb_invalid_class , name , \
TP_PROTO ( struct xfs_inode * ip , const struct iomap * iomap , unsigned int wpcseq , int whichfork ) , \
TP_ARGS ( ip , iomap , wpcseq , whichfork ) )
DEFINE_WB_INVALID_EVENT ( xfs_wb_cow_iomap_invalid ) ;
DEFINE_WB_INVALID_EVENT ( xfs_wb_data_iomap_invalid ) ;
2022-11-29 01:24:36 +00:00
DECLARE_EVENT_CLASS ( xfs_iomap_invalid_class ,
TP_PROTO ( struct xfs_inode * ip , const struct iomap * iomap ) ,
TP_ARGS ( ip , iomap ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( u64 , addr )
__field ( loff_t , pos )
__field ( u64 , len )
__field ( u64 , validity_cookie )
__field ( u64 , inodeseq )
__field ( u16 , type )
__field ( u16 , flags )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > addr = iomap - > addr ;
__entry - > pos = iomap - > offset ;
__entry - > len = iomap - > length ;
__entry - > validity_cookie = iomap - > validity_cookie ;
__entry - > type = iomap - > type ;
__entry - > flags = iomap - > flags ;
__entry - > inodeseq = xfs_iomap_inode_sequence ( ip , iomap - > flags ) ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x validity_cookie 0x%llx inodeseq 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > pos ,
__entry - > addr ,
__entry - > len ,
__entry - > type ,
__entry - > flags ,
__entry - > validity_cookie ,
__entry - > inodeseq )
) ;
# define DEFINE_IOMAP_INVALID_EVENT(name) \
DEFINE_EVENT ( xfs_iomap_invalid_class , name , \
TP_PROTO ( struct xfs_inode * ip , const struct iomap * iomap ) , \
TP_ARGS ( ip , iomap ) )
DEFINE_IOMAP_INVALID_EVENT ( xfs_iomap_invalid ) ;
2016-10-03 16:11:30 +00:00
/* refcount/reflink tracepoint definitions */
/* reflink tracepoints */
DEFINE_INODE_EVENT ( xfs_reflink_set_inode_flag ) ;
DEFINE_INODE_EVENT ( xfs_reflink_unset_inode_flag ) ;
DEFINE_ITRUNC_EVENT ( xfs_reflink_update_inode_size ) ;
2020-06-29 21:47:18 +00:00
TRACE_EVENT ( xfs_reflink_remap_blocks ,
2016-10-03 16:11:30 +00:00
TP_PROTO ( struct xfs_inode * src , xfs_fileoff_t soffset ,
xfs_filblks_t len , struct xfs_inode * dest ,
xfs_fileoff_t doffset ) ,
TP_ARGS ( src , soffset , len , dest , doffset ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , src_ino )
__field ( xfs_fileoff_t , src_lblk )
__field ( xfs_filblks_t , len )
__field ( xfs_ino_t , dest_ino )
__field ( xfs_fileoff_t , dest_lblk )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( src ) - > i_sb - > s_dev ;
__entry - > src_ino = src - > i_ino ;
__entry - > src_lblk = soffset ;
__entry - > len = len ;
__entry - > dest_ino = dest - > i_ino ;
__entry - > dest_lblk = doffset ;
) ,
2021-08-17 19:45:59 +00:00
TP_printk ( " dev %d:%d fsbcount 0x%llx "
2021-08-17 17:09:12 +00:00
" ino 0x%llx fileoff 0x%llx -> ino 0x%llx fileoff 0x%llx " ,
2016-10-03 16:11:30 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > len ,
__entry - > src_ino ,
__entry - > src_lblk ,
__entry - > dest_ino ,
__entry - > dest_lblk )
) ;
DEFINE_DOUBLE_IO_EVENT ( xfs_reflink_remap_range ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_remap_range_error ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_set_inode_flag_error ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_update_inode_size_error ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_remap_blocks_error ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_remap_extent_error ) ;
2020-06-29 21:47:18 +00:00
DEFINE_INODE_IREC_EVENT ( xfs_reflink_remap_extent_src ) ;
DEFINE_INODE_IREC_EVENT ( xfs_reflink_remap_extent_dest ) ;
2016-10-03 16:11:30 +00:00
/* dedupe tracepoints */
DEFINE_DOUBLE_IO_EVENT ( xfs_reflink_compare_extents ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_compare_extents_error ) ;
/* ioctl tracepoints */
TRACE_EVENT ( xfs_ioctl_clone ,
TP_PROTO ( struct inode * src , struct inode * dest ) ,
TP_ARGS ( src , dest ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned long , src_ino )
__field ( loff_t , src_isize )
__field ( unsigned long , dest_ino )
__field ( loff_t , dest_isize )
) ,
TP_fast_assign (
__entry - > dev = src - > i_sb - > s_dev ;
__entry - > src_ino = src - > i_ino ;
__entry - > src_isize = i_size_read ( src ) ;
__entry - > dest_ino = dest - > i_ino ;
__entry - > dest_isize = i_size_read ( dest ) ;
) ,
2021-08-17 20:03:19 +00:00
TP_printk ( " dev %d:%d ino 0x%lx isize 0x%llx -> ino 0x%lx isize 0x%llx " ,
2016-10-03 16:11:30 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > src_ino ,
__entry - > src_isize ,
__entry - > dest_ino ,
__entry - > dest_isize )
) ;
/* unshare tracepoints */
DEFINE_SIMPLE_IO_EVENT ( xfs_reflink_unshare ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_unshare_error ) ;
/* copy on write */
DEFINE_INODE_IREC_EVENT ( xfs_reflink_trim_around_shared ) ;
2016-10-03 16:11:32 +00:00
DEFINE_INODE_IREC_EVENT ( xfs_reflink_cow_found ) ;
DEFINE_INODE_IREC_EVENT ( xfs_reflink_cow_enospc ) ;
2017-02-02 23:14:02 +00:00
DEFINE_INODE_IREC_EVENT ( xfs_reflink_convert_cow ) ;
2016-10-03 16:11:30 +00:00
DEFINE_SIMPLE_IO_EVENT ( xfs_reflink_cancel_cow_range ) ;
DEFINE_SIMPLE_IO_EVENT ( xfs_reflink_end_cow ) ;
2022-04-26 01:38:15 +00:00
DEFINE_INODE_IREC_EVENT ( xfs_reflink_cow_remap_from ) ;
DEFINE_INODE_IREC_EVENT ( xfs_reflink_cow_remap_to ) ;
2016-10-03 16:11:30 +00:00
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_cancel_cow_range_error ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_reflink_end_cow_error ) ;
DEFINE_INODE_IREC_EVENT ( xfs_reflink_cancel_cow ) ;
2016-10-03 16:11:53 +00:00
/* rmap swapext tracepoints */
DEFINE_INODE_IREC_EVENT ( xfs_swap_extent_rmap_remap ) ;
DEFINE_INODE_IREC_EVENT ( xfs_swap_extent_rmap_remap_piece ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_swap_extent_rmap_error ) ;
2017-03-28 21:56:37 +00:00
/* fsmap traces */
2024-11-04 04:19:03 +00:00
TRACE_EVENT ( xfs_fsmap_mapping ,
TP_PROTO ( struct xfs_mount * mp , u32 keydev , xfs_agnumber_t agno ,
const struct xfs_fsmap_irec * frec ) ,
TP_ARGS ( mp , keydev , agno , frec ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( dev_t , keydev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( xfs_daddr_t , start_daddr )
__field ( xfs_daddr_t , len_daddr )
__field ( uint64_t , owner )
__field ( uint64_t , offset )
__field ( unsigned int , flags )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > keydev = new_decode_dev ( keydev ) ;
__entry - > agno = agno ;
__entry - > agbno = frec - > rec_key ;
__entry - > start_daddr = frec - > start_daddr ;
__entry - > len_daddr = frec - > len_daddr ;
__entry - > owner = frec - > owner ;
__entry - > offset = frec - > offset ;
__entry - > flags = frec - > rm_flags ;
) ,
TP_printk ( " dev %d:%d keydev %d:%d agno 0x%x rmapbno 0x%x start_daddr 0x%llx len_daddr 0x%llx owner 0x%llx fileoff 0x%llx flags 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
MAJOR ( __entry - > keydev ) , MINOR ( __entry - > keydev ) ,
__entry - > agno ,
__entry - > agbno ,
__entry - > start_daddr ,
__entry - > len_daddr ,
__entry - > owner ,
__entry - > offset ,
__entry - > flags )
) ;
DECLARE_EVENT_CLASS ( xfs_fsmap_group_key_class ,
2017-03-28 21:56:37 +00:00
TP_PROTO ( struct xfs_mount * mp , u32 keydev , xfs_agnumber_t agno ,
2021-08-11 00:02:16 +00:00
const struct xfs_rmap_irec * rmap ) ,
2017-03-28 21:56:37 +00:00
TP_ARGS ( mp , keydev , agno , rmap ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( dev_t , keydev )
__field ( xfs_agnumber_t , agno )
2024-11-04 04:19:03 +00:00
__field ( xfs_agblock_t , agbno )
2017-06-16 18:00:05 +00:00
__field ( uint64_t , owner )
__field ( uint64_t , offset )
2017-03-28 21:56:37 +00:00
__field ( unsigned int , flags )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > keydev = new_decode_dev ( keydev ) ;
__entry - > agno = agno ;
2024-11-04 04:19:03 +00:00
__entry - > agbno = rmap - > rm_startblock ;
2017-03-28 21:56:37 +00:00
__entry - > owner = rmap - > rm_owner ;
__entry - > offset = rmap - > rm_offset ;
__entry - > flags = rmap - > rm_flags ;
) ,
2024-11-04 04:19:03 +00:00
TP_printk ( " dev %d:%d keydev %d:%d agno 0x%x startblock 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x " ,
2017-03-28 21:56:37 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
MAJOR ( __entry - > keydev ) , MINOR ( __entry - > keydev ) ,
__entry - > agno ,
2024-11-04 04:19:03 +00:00
__entry - > agbno ,
2017-03-28 21:56:37 +00:00
__entry - > owner ,
__entry - > offset ,
__entry - > flags )
)
2024-11-04 04:19:03 +00:00
# define DEFINE_FSMAP_GROUP_KEY_EVENT(name) \
DEFINE_EVENT ( xfs_fsmap_group_key_class , name , \
2017-03-28 21:56:37 +00:00
TP_PROTO ( struct xfs_mount * mp , u32 keydev , xfs_agnumber_t agno , \
2021-08-11 00:02:16 +00:00
const struct xfs_rmap_irec * rmap ) , \
2017-03-28 21:56:37 +00:00
TP_ARGS ( mp , keydev , agno , rmap ) )
2024-11-04 04:19:03 +00:00
DEFINE_FSMAP_GROUP_KEY_EVENT ( xfs_fsmap_low_group_key ) ;
DEFINE_FSMAP_GROUP_KEY_EVENT ( xfs_fsmap_high_group_key ) ;
2017-03-28 21:56:37 +00:00
2024-11-04 04:19:03 +00:00
DECLARE_EVENT_CLASS ( xfs_fsmap_linear_key_class ,
TP_PROTO ( struct xfs_mount * mp , u32 keydev , xfs_fsblock_t bno ) ,
2023-06-30 00:39:44 +00:00
TP_ARGS ( mp , keydev , bno ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( dev_t , keydev )
__field ( xfs_fsblock_t , bno )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > keydev = new_decode_dev ( keydev ) ;
__entry - > bno = bno ;
) ,
TP_printk ( " dev %d:%d keydev %d:%d bno 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
MAJOR ( __entry - > keydev ) , MINOR ( __entry - > keydev ) ,
__entry - > bno )
)
2024-11-04 04:19:03 +00:00
# define DEFINE_FSMAP_LINEAR_KEY_EVENT(name) \
DEFINE_EVENT ( xfs_fsmap_linear_key_class , name , \
2023-06-30 00:39:44 +00:00
TP_PROTO ( struct xfs_mount * mp , u32 keydev , uint64_t bno ) , \
TP_ARGS ( mp , keydev , bno ) )
2024-11-04 04:19:03 +00:00
DEFINE_FSMAP_LINEAR_KEY_EVENT ( xfs_fsmap_low_linear_key ) ;
DEFINE_FSMAP_LINEAR_KEY_EVENT ( xfs_fsmap_high_linear_key ) ;
2023-06-30 00:39:44 +00:00
2017-03-28 21:56:37 +00:00
DECLARE_EVENT_CLASS ( xfs_getfsmap_class ,
TP_PROTO ( struct xfs_mount * mp , struct xfs_fsmap * fsmap ) ,
TP_ARGS ( mp , fsmap ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( dev_t , keydev )
__field ( xfs_daddr_t , block )
__field ( xfs_daddr_t , len )
2017-06-16 18:00:05 +00:00
__field ( uint64_t , owner )
__field ( uint64_t , offset )
__field ( uint64_t , flags )
2017-03-28 21:56:37 +00:00
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > keydev = new_decode_dev ( fsmap - > fmr_device ) ;
__entry - > block = fsmap - > fmr_physical ;
__entry - > len = fsmap - > fmr_length ;
__entry - > owner = fsmap - > fmr_owner ;
__entry - > offset = fsmap - > fmr_offset ;
__entry - > flags = fsmap - > fmr_flags ;
) ,
2021-08-17 19:45:59 +00:00
TP_printk ( " dev %d:%d keydev %d:%d daddr 0x%llx bbcount 0x%llx owner 0x%llx fileoff_daddr 0x%llx flags 0x%llx " ,
2017-03-28 21:56:37 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
MAJOR ( __entry - > keydev ) , MINOR ( __entry - > keydev ) ,
__entry - > block ,
__entry - > len ,
__entry - > owner ,
__entry - > offset ,
__entry - > flags )
)
# define DEFINE_GETFSMAP_EVENT(name) \
DEFINE_EVENT ( xfs_getfsmap_class , name , \
TP_PROTO ( struct xfs_mount * mp , struct xfs_fsmap * fsmap ) , \
TP_ARGS ( mp , fsmap ) )
DEFINE_GETFSMAP_EVENT ( xfs_getfsmap_low_key ) ;
DEFINE_GETFSMAP_EVENT ( xfs_getfsmap_high_key ) ;
DEFINE_GETFSMAP_EVENT ( xfs_getfsmap_mapping ) ;
2022-04-26 01:38:13 +00:00
DECLARE_EVENT_CLASS ( xfs_trans_resv_class ,
2018-01-08 18:51:26 +00:00
TP_PROTO ( struct xfs_mount * mp , unsigned int type ,
struct xfs_trans_res * res ) ,
TP_ARGS ( mp , type , res ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( int , type )
__field ( uint , logres )
__field ( int , logcount )
__field ( int , logflags )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > type = type ;
__entry - > logres = res - > tr_logres ;
__entry - > logcount = res - > tr_logcount ;
__entry - > logflags = res - > tr_logflags ;
) ,
TP_printk ( " dev %d:%d type %d logres %u logcount %d flags 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > type ,
__entry - > logres ,
__entry - > logcount ,
__entry - > logflags )
2022-04-26 01:38:13 +00:00
)
# define DEFINE_TRANS_RESV_EVENT(name) \
DEFINE_EVENT ( xfs_trans_resv_class , name , \
TP_PROTO ( struct xfs_mount * mp , unsigned int type , \
struct xfs_trans_res * res ) , \
TP_ARGS ( mp , type , res ) )
DEFINE_TRANS_RESV_EVENT ( xfs_trans_resv_calc ) ;
DEFINE_TRANS_RESV_EVENT ( xfs_trans_resv_calc_minlogsize ) ;
2018-01-08 18:51:26 +00:00
2022-04-26 01:38:13 +00:00
TRACE_EVENT ( xfs_log_get_max_trans_res ,
TP_PROTO ( struct xfs_mount * mp , const struct xfs_trans_res * res ) ,
TP_ARGS ( mp , res ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( uint , logres )
__field ( int , logcount )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > logres = res - > tr_logres ;
__entry - > logcount = res - > tr_logcount ;
) ,
TP_printk ( " dev %d:%d logres %u logcount %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > logres ,
__entry - > logcount )
2018-01-08 18:51:26 +00:00
) ;
2018-05-09 14:47:57 +00:00
DECLARE_EVENT_CLASS ( xfs_trans_class ,
TP_PROTO ( struct xfs_trans * tp , unsigned long caller_ip ) ,
TP_ARGS ( tp , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( uint32_t , tid )
__field ( uint32_t , flags )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = tp - > t_mountp - > m_super - > s_dev ;
__entry - > tid = 0 ;
if ( tp - > t_ticket )
__entry - > tid = tp - > t_ticket - > t_tid ;
__entry - > flags = tp - > t_flags ;
__entry - > caller_ip = caller_ip ;
) ,
TP_printk ( " dev %d:%d trans %x flags 0x%x caller %pS " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > tid ,
__entry - > flags ,
( char * ) __entry - > caller_ip )
)
# define DEFINE_TRANS_EVENT(name) \
DEFINE_EVENT ( xfs_trans_class , name , \
TP_PROTO ( struct xfs_trans * tp , unsigned long caller_ip ) , \
TP_ARGS ( tp , caller_ip ) )
DEFINE_TRANS_EVENT ( xfs_trans_alloc ) ;
DEFINE_TRANS_EVENT ( xfs_trans_cancel ) ;
DEFINE_TRANS_EVENT ( xfs_trans_commit ) ;
DEFINE_TRANS_EVENT ( xfs_trans_dup ) ;
DEFINE_TRANS_EVENT ( xfs_trans_free ) ;
DEFINE_TRANS_EVENT ( xfs_trans_roll ) ;
DEFINE_TRANS_EVENT ( xfs_trans_add_item ) ;
2019-06-29 02:27:31 +00:00
DEFINE_TRANS_EVENT ( xfs_trans_commit_items ) ;
2018-05-09 14:47:57 +00:00
DEFINE_TRANS_EVENT ( xfs_trans_free_items ) ;
2019-02-07 18:37:14 +00:00
TRACE_EVENT ( xfs_iunlink_update_bucket ,
2024-11-04 04:18:36 +00:00
TP_PROTO ( const struct xfs_perag * pag , unsigned int bucket ,
2019-02-07 18:37:14 +00:00
xfs_agino_t old_ptr , xfs_agino_t new_ptr ) ,
2024-11-04 04:18:36 +00:00
TP_ARGS ( pag , bucket , old_ptr , new_ptr ) ,
2019-02-07 18:37:14 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( unsigned int , bucket )
__field ( xfs_agino_t , old_ptr )
__field ( xfs_agino_t , new_ptr )
) ,
TP_fast_assign (
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( pag ) - > m_super - > s_dev ;
__entry - > agno = pag_agno ( pag ) ;
2019-02-07 18:37:14 +00:00
__entry - > bucket = bucket ;
__entry - > old_ptr = old_ptr ;
__entry - > new_ptr = new_ptr ;
) ,
2021-08-17 16:24:26 +00:00
TP_printk ( " dev %d:%d agno 0x%x bucket %u old 0x%x new 0x%x " ,
2019-02-07 18:37:14 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > bucket ,
__entry - > old_ptr ,
__entry - > new_ptr )
) ;
2019-02-07 18:37:15 +00:00
TRACE_EVENT ( xfs_iunlink_update_dinode ,
2024-11-04 04:18:33 +00:00
TP_PROTO ( const struct xfs_iunlink_item * iup , xfs_agino_t old_ptr ) ,
TP_ARGS ( iup , old_ptr ) ,
2019-02-07 18:37:15 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
__field ( xfs_agino_t , old_ptr )
__field ( xfs_agino_t , new_ptr )
) ,
TP_fast_assign (
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( iup - > pag ) - > m_super - > s_dev ;
__entry - > agno = pag_agno ( iup - > pag ) ;
2024-11-04 04:18:33 +00:00
__entry - > agino =
XFS_INO_TO_AGINO ( iup - > ip - > i_mount , iup - > ip - > i_ino ) ;
2019-02-07 18:37:15 +00:00
__entry - > old_ptr = old_ptr ;
2024-11-04 04:18:33 +00:00
__entry - > new_ptr = iup - > next_agino ;
2019-02-07 18:37:15 +00:00
) ,
2021-08-17 16:24:26 +00:00
TP_printk ( " dev %d:%d agno 0x%x agino 0x%x old 0x%x new 0x%x " ,
2019-02-07 18:37:15 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agino ,
__entry - > old_ptr ,
__entry - > new_ptr )
) ;
2023-09-11 15:39:06 +00:00
TRACE_EVENT ( xfs_iunlink_reload_next ,
TP_PROTO ( struct xfs_inode * ip ) ,
TP_ARGS ( ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
__field ( xfs_agino_t , prev_agino )
__field ( xfs_agino_t , next_agino )
) ,
TP_fast_assign (
__entry - > dev = ip - > i_mount - > m_super - > s_dev ;
__entry - > agno = XFS_INO_TO_AGNO ( ip - > i_mount , ip - > i_ino ) ;
__entry - > agino = XFS_INO_TO_AGINO ( ip - > i_mount , ip - > i_ino ) ;
__entry - > prev_agino = ip - > i_prev_unlinked ;
__entry - > next_agino = ip - > i_next_unlinked ;
) ,
TP_printk ( " dev %d:%d agno 0x%x agino 0x%x prev_unlinked 0x%x next_unlinked 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agino ,
__entry - > prev_agino ,
__entry - > next_agino )
) ;
2023-09-11 15:39:07 +00:00
TRACE_EVENT ( xfs_inode_reload_unlinked_bucket ,
TP_PROTO ( struct xfs_inode * ip ) ,
TP_ARGS ( ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
) ,
TP_fast_assign (
__entry - > dev = ip - > i_mount - > m_super - > s_dev ;
__entry - > agno = XFS_INO_TO_AGNO ( ip - > i_mount , ip - > i_ino ) ;
__entry - > agino = XFS_INO_TO_AGINO ( ip - > i_mount , ip - > i_ino ) ;
) ,
TP_printk ( " dev %d:%d agno 0x%x agino 0x%x bucket %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno ,
__entry - > agino ,
__entry - > agino % XFS_AGI_UNLINKED_BUCKETS )
) ;
2019-02-07 18:37:16 +00:00
DECLARE_EVENT_CLASS ( xfs_ag_inode_class ,
TP_PROTO ( struct xfs_inode * ip ) ,
TP_ARGS ( ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > agno = XFS_INO_TO_AGNO ( ip - > i_mount , ip - > i_ino ) ;
__entry - > agino = XFS_INO_TO_AGINO ( ip - > i_mount , ip - > i_ino ) ;
) ,
2021-08-17 16:24:26 +00:00
TP_printk ( " dev %d:%d agno 0x%x agino 0x%x " ,
2019-02-07 18:37:16 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > agno , __entry - > agino )
)
# define DEFINE_AGINODE_EVENT(name) \
DEFINE_EVENT ( xfs_ag_inode_class , name , \
TP_PROTO ( struct xfs_inode * ip ) , \
TP_ARGS ( ip ) )
DEFINE_AGINODE_EVENT ( xfs_iunlink ) ;
DEFINE_AGINODE_EVENT ( xfs_iunlink_remove ) ;
2019-04-12 14:40:25 +00:00
DECLARE_EVENT_CLASS ( xfs_fs_corrupt_class ,
TP_PROTO ( struct xfs_mount * mp , unsigned int flags ) ,
TP_ARGS ( mp , flags ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned int , flags )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > flags = flags ;
) ,
TP_printk ( " dev %d:%d flags 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > flags )
) ;
# define DEFINE_FS_CORRUPT_EVENT(name) \
DEFINE_EVENT ( xfs_fs_corrupt_class , name , \
TP_PROTO ( struct xfs_mount * mp , unsigned int flags ) , \
TP_ARGS ( mp , flags ) )
DEFINE_FS_CORRUPT_EVENT ( xfs_fs_mark_sick ) ;
2024-02-22 20:31:01 +00:00
DEFINE_FS_CORRUPT_EVENT ( xfs_fs_mark_corrupt ) ;
2019-04-12 14:40:25 +00:00
DEFINE_FS_CORRUPT_EVENT ( xfs_fs_mark_healthy ) ;
2019-04-12 14:41:16 +00:00
DEFINE_FS_CORRUPT_EVENT ( xfs_fs_unfixed_corruption ) ;
2019-04-12 14:40:25 +00:00
2024-11-04 04:18:40 +00:00
DECLARE_EVENT_CLASS ( xfs_group_corrupt_class ,
TP_PROTO ( const struct xfs_group * xg , unsigned int flags ) ,
TP_ARGS ( xg , flags ) ,
2019-04-12 14:40:25 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-11-04 04:18:40 +00:00
__field ( enum xfs_group_type , type )
__field ( uint32_t , index )
2019-04-12 14:40:25 +00:00
__field ( unsigned int , flags )
) ,
TP_fast_assign (
2024-11-04 04:18:40 +00:00
__entry - > dev = xg - > xg_mount - > m_super - > s_dev ;
__entry - > type = xg - > xg_type ;
__entry - > index = xg - > xg_gno ;
2019-04-12 14:40:25 +00:00
__entry - > flags = flags ;
) ,
2024-11-04 04:18:40 +00:00
TP_printk ( " dev %d:%d %sno 0x%x flags 0x%x " ,
2019-04-12 14:40:25 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-11-04 04:18:40 +00:00
__print_symbolic ( __entry - > type , XG_TYPE_STRINGS ) ,
__entry - > index , __entry - > flags )
2019-04-12 14:40:25 +00:00
) ;
2024-11-04 04:18:40 +00:00
# define DEFINE_GROUP_CORRUPT_EVENT(name) \
DEFINE_EVENT ( xfs_group_corrupt_class , name , \
TP_PROTO ( const struct xfs_group * xg , unsigned int flags ) , \
TP_ARGS ( xg , flags ) )
DEFINE_GROUP_CORRUPT_EVENT ( xfs_group_mark_sick ) ;
DEFINE_GROUP_CORRUPT_EVENT ( xfs_group_mark_corrupt ) ;
DEFINE_GROUP_CORRUPT_EVENT ( xfs_group_mark_healthy ) ;
DEFINE_GROUP_CORRUPT_EVENT ( xfs_group_unfixed_corruption ) ;
2019-04-12 14:40:25 +00:00
DECLARE_EVENT_CLASS ( xfs_inode_corrupt_class ,
TP_PROTO ( struct xfs_inode * ip , unsigned int flags ) ,
TP_ARGS ( ip , flags ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( unsigned int , flags )
) ,
TP_fast_assign (
__entry - > dev = ip - > i_mount - > m_super - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > flags = flags ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx flags 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino , __entry - > flags )
) ;
# define DEFINE_INODE_CORRUPT_EVENT(name) \
DEFINE_EVENT ( xfs_inode_corrupt_class , name , \
TP_PROTO ( struct xfs_inode * ip , unsigned int flags ) , \
TP_ARGS ( ip , flags ) )
DEFINE_INODE_CORRUPT_EVENT ( xfs_inode_mark_sick ) ;
2024-02-22 20:31:01 +00:00
DEFINE_INODE_CORRUPT_EVENT ( xfs_inode_mark_corrupt ) ;
2019-04-12 14:40:25 +00:00
DEFINE_INODE_CORRUPT_EVENT ( xfs_inode_mark_healthy ) ;
2024-02-22 20:33:03 +00:00
DEFINE_INODE_CORRUPT_EVENT ( xfs_inode_unfixed_corruption ) ;
2019-04-12 14:40:25 +00:00
2019-07-02 16:39:38 +00:00
TRACE_EVENT ( xfs_iwalk_ag_rec ,
2024-11-04 04:18:36 +00:00
TP_PROTO ( const struct xfs_perag * pag , \
2019-07-02 16:39:38 +00:00
struct xfs_inobt_rec_incore * irec ) ,
2024-11-04 04:18:36 +00:00
TP_ARGS ( pag , irec ) ,
2019-07-02 16:39:38 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , startino )
__field ( uint64_t , freemask )
) ,
TP_fast_assign (
2024-11-04 04:18:38 +00:00
__entry - > dev = pag_mount ( pag ) - > m_super - > s_dev ;
__entry - > agno = pag_agno ( pag ) ;
2019-07-02 16:39:38 +00:00
__entry - > startino = irec - > ir_startino ;
__entry - > freemask = irec - > ir_free ;
) ,
2021-08-17 16:24:26 +00:00
TP_printk ( " dev %d:%d agno 0x%x startino 0x%x freemask 0x%llx " ,
2019-07-02 16:39:38 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) , __entry - > agno ,
__entry - > startino , __entry - > freemask )
)
2019-07-03 14:33:26 +00:00
TRACE_EVENT ( xfs_pwork_init ,
TP_PROTO ( struct xfs_mount * mp , unsigned int nr_threads , pid_t pid ) ,
TP_ARGS ( mp , nr_threads , pid ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned int , nr_threads )
__field ( pid_t , pid )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > nr_threads = nr_threads ;
__entry - > pid = pid ;
) ,
TP_printk ( " dev %d:%d nr_threads %u pid %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > nr_threads , __entry - > pid )
)
2019-12-11 21:19:06 +00:00
TRACE_EVENT ( xfs_check_new_dalign ,
TP_PROTO ( struct xfs_mount * mp , int new_dalign , xfs_ino_t calc_rootino ) ,
TP_ARGS ( mp , new_dalign , calc_rootino ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( int , new_dalign )
__field ( xfs_ino_t , sb_rootino )
__field ( xfs_ino_t , calc_rootino )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > new_dalign = new_dalign ;
__entry - > sb_rootino = mp - > m_sb . sb_rootino ;
__entry - > calc_rootino = calc_rootino ;
) ,
2021-08-17 16:20:27 +00:00
TP_printk ( " dev %d:%d new_dalign %d sb_rootino 0x%llx calc_rootino 0x%llx " ,
2019-12-11 21:19:06 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > new_dalign , __entry - > sb_rootino ,
__entry - > calc_rootino )
)
2020-03-11 17:40:26 +00:00
TRACE_EVENT ( xfs_btree_commit_afakeroot ,
TP_PROTO ( struct xfs_btree_cur * cur ) ,
TP_ARGS ( cur ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-02-22 20:39:47 +00:00
__string ( name , cur - > bc_ops - > name )
2020-03-11 17:40:26 +00:00
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( unsigned int , levels )
__field ( unsigned int , blocks )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2020-03-11 17:40:26 +00:00
__entry - > agbno = cur - > bc_ag . afake - > af_root ;
__entry - > levels = cur - > bc_ag . afake - > af_levels ;
__entry - > blocks = cur - > bc_ag . afake - > af_blocks ;
) ,
2024-02-22 20:39:47 +00:00
TP_printk ( " dev %d:%d %sbt agno 0x%x levels %u blocks %u root %u " ,
2020-03-11 17:40:26 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-02-22 20:39:47 +00:00
__get_str ( name ) ,
2020-03-11 17:40:26 +00:00
__entry - > agno ,
__entry - > levels ,
__entry - > blocks ,
__entry - > agbno )
)
2020-03-11 17:42:34 +00:00
TRACE_EVENT ( xfs_btree_commit_ifakeroot ,
TP_PROTO ( struct xfs_btree_cur * cur ) ,
TP_ARGS ( cur ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-02-22 20:39:47 +00:00
__string ( name , cur - > bc_ops - > name )
2020-03-11 17:42:34 +00:00
__field ( xfs_agnumber_t , agno )
__field ( xfs_agino_t , agino )
__field ( unsigned int , levels )
__field ( unsigned int , blocks )
__field ( int , whichfork )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
2020-03-11 17:42:34 +00:00
__entry - > agno = XFS_INO_TO_AGNO ( cur - > bc_mp ,
cur - > bc_ino . ip - > i_ino ) ;
__entry - > agino = XFS_INO_TO_AGINO ( cur - > bc_mp ,
cur - > bc_ino . ip - > i_ino ) ;
__entry - > levels = cur - > bc_ino . ifake - > if_levels ;
__entry - > blocks = cur - > bc_ino . ifake - > if_blocks ;
__entry - > whichfork = cur - > bc_ino . whichfork ;
) ,
2024-02-22 20:39:47 +00:00
TP_printk ( " dev %d:%d %sbt agno 0x%x agino 0x%x whichfork %s levels %u blocks %u " ,
2020-03-11 17:42:34 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-02-22 20:39:47 +00:00
__get_str ( name ) ,
2020-03-11 17:42:34 +00:00
__entry - > agno ,
__entry - > agino ,
2021-08-17 20:09:26 +00:00
__print_symbolic ( __entry - > whichfork , XFS_WHICHFORK_STRINGS ) ,
2020-03-11 17:42:34 +00:00
__entry - > levels ,
__entry - > blocks )
)
2020-03-11 17:51:50 +00:00
TRACE_EVENT ( xfs_btree_bload_level_geometry ,
TP_PROTO ( struct xfs_btree_cur * cur , unsigned int level ,
uint64_t nr_this_level , unsigned int nr_per_block ,
unsigned int desired_npb , uint64_t blocks ,
uint64_t blocks_with_extra ) ,
TP_ARGS ( cur , level , nr_this_level , nr_per_block , desired_npb , blocks ,
blocks_with_extra ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-02-22 20:39:47 +00:00
__string ( name , cur - > bc_ops - > name )
2020-03-11 17:51:50 +00:00
__field ( unsigned int , level )
__field ( unsigned int , nlevels )
__field ( uint64_t , nr_this_level )
__field ( unsigned int , nr_per_block )
__field ( unsigned int , desired_npb )
__field ( unsigned long long , blocks )
__field ( unsigned long long , blocks_with_extra )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
2020-03-11 17:51:50 +00:00
__entry - > level = level ;
__entry - > nlevels = cur - > bc_nlevels ;
__entry - > nr_this_level = nr_this_level ;
__entry - > nr_per_block = nr_per_block ;
__entry - > desired_npb = desired_npb ;
__entry - > blocks = blocks ;
__entry - > blocks_with_extra = blocks_with_extra ;
) ,
2024-02-22 20:39:47 +00:00
TP_printk ( " dev %d:%d %sbt level %u/%u nr_this_level %llu nr_per_block %u desired_npb %u blocks %llu blocks_with_extra %llu " ,
2020-03-11 17:51:50 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-02-22 20:39:47 +00:00
__get_str ( name ) ,
2020-03-11 17:51:50 +00:00
__entry - > level ,
__entry - > nlevels ,
__entry - > nr_this_level ,
__entry - > nr_per_block ,
__entry - > desired_npb ,
__entry - > blocks ,
__entry - > blocks_with_extra )
)
TRACE_EVENT ( xfs_btree_bload_block ,
TP_PROTO ( struct xfs_btree_cur * cur , unsigned int level ,
uint64_t block_idx , uint64_t nr_blocks ,
union xfs_btree_ptr * ptr , unsigned int nr_records ) ,
TP_ARGS ( cur , level , block_idx , nr_blocks , ptr , nr_records ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-02-22 20:39:47 +00:00
__string ( name , cur - > bc_ops - > name )
2020-03-11 17:51:50 +00:00
__field ( unsigned int , level )
__field ( unsigned long long , block_idx )
__field ( unsigned long long , nr_blocks )
__field ( xfs_agnumber_t , agno )
__field ( xfs_agblock_t , agbno )
__field ( unsigned int , nr_records )
) ,
TP_fast_assign (
__entry - > dev = cur - > bc_mp - > m_super - > s_dev ;
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
2020-03-11 17:51:50 +00:00
__entry - > level = level ;
__entry - > block_idx = block_idx ;
__entry - > nr_blocks = nr_blocks ;
2024-02-22 20:35:36 +00:00
if ( cur - > bc_ops - > ptr_len = = XFS_BTREE_LONG_PTR_LEN ) {
2020-03-11 17:51:50 +00:00
xfs_fsblock_t fsb = be64_to_cpu ( ptr - > l ) ;
__entry - > agno = XFS_FSB_TO_AGNO ( cur - > bc_mp , fsb ) ;
__entry - > agbno = XFS_FSB_TO_AGBNO ( cur - > bc_mp , fsb ) ;
} else {
2024-11-04 04:18:44 +00:00
__entry - > agno = cur - > bc_group - > xg_gno ;
2020-03-11 17:51:50 +00:00
__entry - > agbno = be32_to_cpu ( ptr - > s ) ;
}
__entry - > nr_records = nr_records ;
) ,
2024-02-22 20:39:47 +00:00
TP_printk ( " dev %d:%d %sbt level %u block %llu/%llu agno 0x%x agbno 0x%x recs %u " ,
2020-03-11 17:51:50 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-02-22 20:39:47 +00:00
__get_str ( name ) ,
2020-03-11 17:51:50 +00:00
__entry - > level ,
__entry - > block_idx ,
__entry - > nr_blocks ,
__entry - > agno ,
__entry - > agbno ,
__entry - > nr_records )
)
2020-08-24 18:58:01 +00:00
DECLARE_EVENT_CLASS ( xfs_timestamp_range_class ,
TP_PROTO ( struct xfs_mount * mp , time64_t min , time64_t max ) ,
TP_ARGS ( mp , min , max ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( long long , min )
__field ( long long , max )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > min = min ;
__entry - > max = max ;
) ,
TP_printk ( " dev %d:%d min %lld max %lld " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > min ,
__entry - > max )
)
# define DEFINE_TIMESTAMP_RANGE_EVENT(name) \
DEFINE_EVENT ( xfs_timestamp_range_class , name , \
TP_PROTO ( struct xfs_mount * mp , long long min , long long max ) , \
TP_ARGS ( mp , min , max ) )
DEFINE_TIMESTAMP_RANGE_EVENT ( xfs_inode_timestamp_range ) ;
DEFINE_TIMESTAMP_RANGE_EVENT ( xfs_quota_expiry_range ) ;
2021-06-07 16:34:51 +00:00
DECLARE_EVENT_CLASS ( xfs_icwalk_class ,
TP_PROTO ( struct xfs_mount * mp , struct xfs_icwalk * icw ,
2021-01-23 00:48:38 +00:00
unsigned long caller_ip ) ,
2021-06-07 16:34:51 +00:00
TP_ARGS ( mp , icw , caller_ip ) ,
2021-01-23 00:48:38 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( __u32 , flags )
__field ( uint32_t , uid )
__field ( uint32_t , gid )
__field ( prid_t , prid )
__field ( __u64 , min_file_size )
2021-06-18 18:57:06 +00:00
__field ( long , scan_limit )
2021-01-23 00:48:38 +00:00
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
2021-06-07 16:34:51 +00:00
__entry - > flags = icw ? icw - > icw_flags : 0 ;
__entry - > uid = icw ? from_kuid ( mp - > m_super - > s_user_ns ,
icw - > icw_uid ) : 0 ;
__entry - > gid = icw ? from_kgid ( mp - > m_super - > s_user_ns ,
icw - > icw_gid ) : 0 ;
__entry - > prid = icw ? icw - > icw_prid : 0 ;
__entry - > min_file_size = icw ? icw - > icw_min_file_size : 0 ;
__entry - > scan_limit = icw ? icw - > icw_scan_limit : 0 ;
2021-01-23 00:48:38 +00:00
__entry - > caller_ip = caller_ip ;
) ,
2021-06-18 18:57:06 +00:00
TP_printk ( " dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %ld caller %pS " ,
2021-01-23 00:48:38 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > flags ,
__entry - > uid ,
__entry - > gid ,
__entry - > prid ,
__entry - > min_file_size ,
2021-05-31 18:32:02 +00:00
__entry - > scan_limit ,
2021-01-23 00:48:38 +00:00
( char * ) __entry - > caller_ip )
) ;
2021-06-07 16:34:51 +00:00
# define DEFINE_ICWALK_EVENT(name) \
DEFINE_EVENT ( xfs_icwalk_class , name , \
TP_PROTO ( struct xfs_mount * mp , struct xfs_icwalk * icw , \
2021-01-23 00:48:38 +00:00
unsigned long caller_ip ) , \
2021-06-07 16:34:51 +00:00
TP_ARGS ( mp , icw , caller_ip ) )
DEFINE_ICWALK_EVENT ( xfs_ioc_free_eofblocks ) ;
DEFINE_ICWALK_EVENT ( xfs_blockgc_free_space ) ;
2021-01-23 00:48:38 +00:00
2021-06-18 18:57:05 +00:00
TRACE_DEFINE_ENUM ( XLOG_STATE_ACTIVE ) ;
TRACE_DEFINE_ENUM ( XLOG_STATE_WANT_SYNC ) ;
TRACE_DEFINE_ENUM ( XLOG_STATE_SYNCING ) ;
TRACE_DEFINE_ENUM ( XLOG_STATE_DONE_SYNC ) ;
TRACE_DEFINE_ENUM ( XLOG_STATE_CALLBACK ) ;
TRACE_DEFINE_ENUM ( XLOG_STATE_DIRTY ) ;
DECLARE_EVENT_CLASS ( xlog_iclog_class ,
TP_PROTO ( struct xlog_in_core * iclog , unsigned long caller_ip ) ,
TP_ARGS ( iclog , caller_ip ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( uint32_t , state )
__field ( int32_t , refcount )
__field ( uint32_t , offset )
2021-07-27 23:23:50 +00:00
__field ( uint32_t , flags )
2021-06-18 18:57:05 +00:00
__field ( unsigned long long , lsn )
__field ( unsigned long , caller_ip )
) ,
TP_fast_assign (
__entry - > dev = iclog - > ic_log - > l_mp - > m_super - > s_dev ;
__entry - > state = iclog - > ic_state ;
__entry - > refcount = atomic_read ( & iclog - > ic_refcnt ) ;
__entry - > offset = iclog - > ic_offset ;
2021-07-27 23:23:50 +00:00
__entry - > flags = iclog - > ic_flags ;
2021-06-18 18:57:05 +00:00
__entry - > lsn = be64_to_cpu ( iclog - > ic_header . h_lsn ) ;
__entry - > caller_ip = caller_ip ;
) ,
2021-07-27 23:23:50 +00:00
TP_printk ( " dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS " ,
2021-06-18 18:57:05 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__print_symbolic ( __entry - > state , XLOG_STATE_STRINGS ) ,
__entry - > refcount ,
__entry - > offset ,
__entry - > lsn ,
2021-07-27 23:23:50 +00:00
__print_flags ( __entry - > flags , " | " , XLOG_ICL_STRINGS ) ,
2021-06-18 18:57:05 +00:00
( char * ) __entry - > caller_ip )
) ;
# define DEFINE_ICLOG_EVENT(name) \
DEFINE_EVENT ( xlog_iclog_class , name , \
TP_PROTO ( struct xlog_in_core * iclog , unsigned long caller_ip ) , \
TP_ARGS ( iclog , caller_ip ) )
DEFINE_ICLOG_EVENT ( xlog_iclog_activate ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_clean ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_callback ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_callbacks_start ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_callbacks_done ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_force ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_force_lsn ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_get_space ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_release ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_switch ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_sync ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_syncing ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_sync_done ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_want_sync ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_wait_on ) ;
DEFINE_ICLOG_EVENT ( xlog_iclog_write ) ;
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 05:12:52 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_UNINIT ) ;
TRACE_DEFINE_ENUM ( XFS_DAS_SF_ADD ) ;
2022-05-12 05:12:56 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_SF_REMOVE ) ;
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 05:12:56 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_LEAF_ADD ) ;
2022-05-12 05:12:56 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_LEAF_REMOVE ) ;
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 05:12:56 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_NODE_ADD ) ;
2022-05-12 05:12:56 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_NODE_REMOVE ) ;
2022-05-12 05:12:55 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_LEAF_SET_RMT ) ;
2022-05-12 05:12:54 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_LEAF_ALLOC_RMT ) ;
2022-05-12 05:12:55 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_LEAF_REPLACE ) ;
2022-05-12 05:12:55 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_LEAF_REMOVE_OLD ) ;
2022-05-12 05:12:55 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_LEAF_REMOVE_RMT ) ;
2022-05-12 05:12:55 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_LEAF_REMOVE_ATTR ) ;
2022-05-12 05:12:55 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_NODE_SET_RMT ) ;
TRACE_DEFINE_ENUM ( XFS_DAS_NODE_ALLOC_RMT ) ;
TRACE_DEFINE_ENUM ( XFS_DAS_NODE_REPLACE ) ;
2022-05-12 05:12:55 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_NODE_REMOVE_OLD ) ;
2022-05-12 05:12:55 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_NODE_REMOVE_RMT ) ;
2022-05-12 05:12:55 +00:00
TRACE_DEFINE_ENUM ( XFS_DAS_NODE_REMOVE_ATTR ) ;
TRACE_DEFINE_ENUM ( XFS_DAS_DONE ) ;
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 05:12:52 +00:00
2021-08-08 15:27:13 +00:00
DECLARE_EVENT_CLASS ( xfs_das_state_class ,
TP_PROTO ( int das , struct xfs_inode * ip ) ,
TP_ARGS ( das , ip ) ,
TP_STRUCT__entry (
__field ( int , das )
__field ( xfs_ino_t , ino )
) ,
TP_fast_assign (
__entry - > das = das ;
__entry - > ino = ip - > i_ino ;
) ,
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 05:12:52 +00:00
TP_printk ( " state change %s ino 0x%llx " ,
__print_symbolic ( __entry - > das , XFS_DAS_STRINGS ) ,
__entry - > ino )
2021-08-08 15:27:13 +00:00
)
# define DEFINE_DAS_STATE_EVENT(name) \
DEFINE_EVENT ( xfs_das_state_class , name , \
TP_PROTO ( int das , struct xfs_inode * ip ) , \
TP_ARGS ( das , ip ) )
DEFINE_DAS_STATE_EVENT ( xfs_attr_sf_addname_return ) ;
DEFINE_DAS_STATE_EVENT ( xfs_attr_set_iter_return ) ;
2022-05-11 07:01:22 +00:00
DEFINE_DAS_STATE_EVENT ( xfs_attr_leaf_addname_return ) ;
2021-08-08 15:27:13 +00:00
DEFINE_DAS_STATE_EVENT ( xfs_attr_node_addname_return ) ;
DEFINE_DAS_STATE_EVENT ( xfs_attr_remove_iter_return ) ;
2022-05-12 05:12:55 +00:00
DEFINE_DAS_STATE_EVENT ( xfs_attr_rmtval_alloc ) ;
2021-08-08 15:27:13 +00:00
DEFINE_DAS_STATE_EVENT ( xfs_attr_rmtval_remove_return ) ;
2022-05-11 07:05:23 +00:00
DEFINE_DAS_STATE_EVENT ( xfs_attr_defer_add ) ;
2021-08-11 00:00:54 +00:00
TRACE_EVENT ( xfs_force_shutdown ,
TP_PROTO ( struct xfs_mount * mp , int ptag , int flags , const char * fname ,
int line_num ) ,
TP_ARGS ( mp , ptag , flags , fname , line_num ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( int , ptag )
__field ( int , flags )
__string ( fname , fname )
__field ( int , line_num )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > ptag = ptag ;
__entry - > flags = flags ;
2024-05-16 17:34:54 +00:00
__assign_str ( fname ) ;
2021-08-11 00:00:54 +00:00
__entry - > line_num = line_num ;
) ,
TP_printk ( " dev %d:%d tag %s flags %s file %s line_num %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__print_flags ( __entry - > ptag , " | " , XFS_PTAG_STRINGS ) ,
__print_flags ( __entry - > flags , " | " , XFS_SHUTDOWN_STRINGS ) ,
__get_str ( fname ) ,
__entry - > line_num )
) ;
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
# ifdef CONFIG_XFS_DRAIN_INTENTS
2024-11-04 04:18:41 +00:00
DECLARE_EVENT_CLASS ( xfs_group_intents_class ,
TP_PROTO ( const struct xfs_group * xg , void * caller_ip ) ,
TP_ARGS ( xg , caller_ip ) ,
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
TP_STRUCT__entry (
__field ( dev_t , dev )
2024-11-04 04:18:41 +00:00
__field ( enum xfs_group_type , type )
__field ( uint32_t , index )
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
__field ( long , nr_intents )
__field ( void * , caller_ip )
) ,
TP_fast_assign (
2024-11-04 04:18:41 +00:00
__entry - > dev = xg - > xg_mount - > m_super - > s_dev ;
__entry - > type = xg - > xg_type ;
__entry - > index = xg - > xg_gno ;
__entry - > nr_intents =
atomic_read ( & xg - > xg_intents_drain . dr_count ) ;
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
__entry - > caller_ip = caller_ip ;
) ,
2024-11-04 04:18:41 +00:00
TP_printk ( " dev %d:%d %sno 0x%x intents %ld caller %pS " ,
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-11-04 04:18:41 +00:00
__print_symbolic ( __entry - > type , XG_TYPE_STRINGS ) ,
__entry - > index ,
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
__entry - > nr_intents ,
__entry - > caller_ip )
) ;
2024-11-04 04:18:41 +00:00
# define DEFINE_GROUP_INTENTS_EVENT(name) \
DEFINE_EVENT ( xfs_group_intents_class , name , \
TP_PROTO ( const struct xfs_group * xg , void * caller_ip ) , \
TP_ARGS ( xg , caller_ip ) )
DEFINE_GROUP_INTENTS_EVENT ( xfs_group_intent_hold ) ;
DEFINE_GROUP_INTENTS_EVENT ( xfs_group_intent_rele ) ;
DEFINE_GROUP_INTENTS_EVENT ( xfs_group_wait_intents ) ;
xfs: allow queued AG intents to drain before scrubbing
When a writer thread executes a chain of log intent items, the AG header
buffer locks will cycle during a transaction roll to get from one intent
item to the next in a chain. Although scrub takes all AG header buffer
locks, this isn't sufficient to guard against scrub checking an AG while
that writer thread is in the middle of finishing a chain because there's
no higher level locking primitive guarding allocation groups.
When there's a collision, cross-referencing between data structures
(e.g. rmapbt and refcountbt) yields false corruption events; if repair
is running, this results in incorrect repairs, which is catastrophic.
Fix this by adding to the perag structure the count of active intents
and make scrub wait until it has both AG header buffer locks and the
intent counter reaches zero.
One quirk of the drain code is that deferred bmap updates also bump and
drop the intent counter. A fundamental decision made during the design
phase of the reverse mapping feature is that updates to the rmapbt
records are always made by the same code that updates the primary
metadata. In other words, callers of bmapi functions expect that the
bmapi functions will queue deferred rmap updates.
Some parts of the reflink code queue deferred refcount (CUI) and bmap
(BUI) updates in the same head transaction, but the deferred work
manager completely finishes the CUI before the BUI work is started. As
a result, the CUI drops the intent count long before the deferred rmap
(RUI) update even has a chance to bump the intent count. The only way
to keep the intent count elevated between the CUI and RUI is for the BUI
to bump the counter until the RUI has been created.
A second quirk of the intent drain code is that deferred work items must
increment the intent counter as soon as the work item is added to the
transaction. When a BUI completes and queues an RUI, the RUI must
increment the counter before the BUI decrements it. The only way to
accomplish this is to require that the counter be bumped as soon as the
deferred work item is created in memory.
In the next patches we'll improve on this facility, but this patch
provides the basic functionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 01:59:58 +00:00
# endif /* CONFIG_XFS_DRAIN_INTENTS */
2024-02-22 20:43:21 +00:00
# ifdef CONFIG_XFS_MEMORY_BUFS
TRACE_EVENT ( xmbuf_create ,
TP_PROTO ( struct xfs_buftarg * btp ) ,
TP_ARGS ( btp ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned long , ino )
2024-07-11 05:43:53 +00:00
__array ( char , pathname , MAXNAMELEN )
2024-02-22 20:43:21 +00:00
) ,
TP_fast_assign (
char * path ;
struct file * file = btp - > bt_file ;
2024-03-07 23:13:52 +00:00
__entry - > dev = btp - > bt_mount - > m_super - > s_dev ;
2024-02-22 20:43:21 +00:00
__entry - > ino = file_inode ( file ) - > i_ino ;
2024-07-11 05:43:53 +00:00
path = file_path ( file , __entry - > pathname , MAXNAMELEN ) ;
2024-02-22 20:43:21 +00:00
if ( IS_ERR ( path ) )
2024-07-11 05:43:53 +00:00
strncpy ( __entry - > pathname , " (unknown) " ,
sizeof ( __entry - > pathname ) ) ;
2024-02-22 20:43:21 +00:00
) ,
2024-03-07 23:13:52 +00:00
TP_printk ( " dev %d:%d xmino 0x%lx path '%s' " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-02-22 20:43:21 +00:00
__entry - > ino ,
__entry - > pathname )
) ;
TRACE_EVENT ( xmbuf_free ,
TP_PROTO ( struct xfs_buftarg * btp ) ,
TP_ARGS ( btp ) ,
TP_STRUCT__entry (
2024-03-07 23:13:52 +00:00
__field ( dev_t , dev )
2024-02-22 20:43:21 +00:00
__field ( unsigned long , ino )
__field ( unsigned long long , bytes )
__field ( loff_t , size )
) ,
TP_fast_assign (
struct file * file = btp - > bt_file ;
struct inode * inode = file_inode ( file ) ;
2024-03-07 23:13:52 +00:00
__entry - > dev = btp - > bt_mount - > m_super - > s_dev ;
2024-02-22 20:43:21 +00:00
__entry - > size = i_size_read ( inode ) ;
__entry - > bytes = ( inode - > i_blocks < < SECTOR_SHIFT ) + inode - > i_bytes ;
__entry - > ino = inode - > i_ino ;
) ,
2024-03-07 23:13:52 +00:00
TP_printk ( " dev %d:%d xmino 0x%lx mem_bytes 0x%llx isize 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
2024-02-22 20:43:21 +00:00
__entry - > ino ,
__entry - > bytes ,
__entry - > size )
) ;
# endif /* CONFIG_XFS_MEMORY_BUFS */
2024-02-22 20:43:35 +00:00
# ifdef CONFIG_XFS_BTREE_IN_MEM
TRACE_EVENT ( xfbtree_init ,
TP_PROTO ( struct xfs_mount * mp , struct xfbtree * xfbt ,
const struct xfs_btree_ops * ops ) ,
TP_ARGS ( mp , xfbt , ops ) ,
TP_STRUCT__entry (
__field ( const void * , btree_ops )
__field ( unsigned long , xfino )
__field ( unsigned int , leaf_mxr )
__field ( unsigned int , leaf_mnr )
__field ( unsigned int , node_mxr )
__field ( unsigned int , node_mnr )
__field ( unsigned long long , owner )
) ,
TP_fast_assign (
__entry - > btree_ops = ops ;
__entry - > xfino = file_inode ( xfbt - > target - > bt_file ) - > i_ino ;
__entry - > leaf_mxr = xfbt - > maxrecs [ 0 ] ;
__entry - > node_mxr = xfbt - > maxrecs [ 1 ] ;
__entry - > leaf_mnr = xfbt - > minrecs [ 0 ] ;
__entry - > node_mnr = xfbt - > minrecs [ 1 ] ;
__entry - > owner = xfbt - > owner ;
) ,
TP_printk ( " xfino 0x%lx btree_ops %pS owner 0x%llx leaf_mxr %u leaf_mnr %u node_mxr %u node_mnr %u " ,
__entry - > xfino ,
__entry - > btree_ops ,
__entry - > owner ,
__entry - > leaf_mxr ,
__entry - > leaf_mnr ,
__entry - > node_mxr ,
__entry - > node_mnr )
) ;
DECLARE_EVENT_CLASS ( xfbtree_buf_class ,
TP_PROTO ( struct xfbtree * xfbt , struct xfs_buf * bp ) ,
TP_ARGS ( xfbt , bp ) ,
TP_STRUCT__entry (
__field ( unsigned long , xfino )
__field ( xfs_daddr_t , bno )
__field ( int , nblks )
__field ( int , hold )
__field ( int , pincount )
__field ( unsigned int , lockval )
__field ( unsigned int , flags )
) ,
TP_fast_assign (
__entry - > xfino = file_inode ( xfbt - > target - > bt_file ) - > i_ino ;
__entry - > bno = xfs_buf_daddr ( bp ) ;
__entry - > nblks = bp - > b_length ;
__entry - > hold = atomic_read ( & bp - > b_hold ) ;
__entry - > pincount = atomic_read ( & bp - > b_pin_count ) ;
__entry - > lockval = bp - > b_sema . count ;
__entry - > flags = bp - > b_flags ;
) ,
TP_printk ( " xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d lock %d flags %s " ,
__entry - > xfino ,
( unsigned long long ) __entry - > bno ,
__entry - > nblks ,
__entry - > hold ,
__entry - > pincount ,
__entry - > lockval ,
__print_flags ( __entry - > flags , " | " , XFS_BUF_FLAGS ) )
)
# define DEFINE_XFBTREE_BUF_EVENT(name) \
DEFINE_EVENT ( xfbtree_buf_class , name , \
TP_PROTO ( struct xfbtree * xfbt , struct xfs_buf * bp ) , \
TP_ARGS ( xfbt , bp ) )
DEFINE_XFBTREE_BUF_EVENT ( xfbtree_create_root_buf ) ;
DEFINE_XFBTREE_BUF_EVENT ( xfbtree_trans_commit_buf ) ;
DEFINE_XFBTREE_BUF_EVENT ( xfbtree_trans_cancel_buf ) ;
DECLARE_EVENT_CLASS ( xfbtree_freesp_class ,
TP_PROTO ( struct xfbtree * xfbt , struct xfs_btree_cur * cur ,
xfs_fileoff_t fileoff ) ,
TP_ARGS ( xfbt , cur , fileoff ) ,
TP_STRUCT__entry (
__field ( unsigned long , xfino )
__string ( btname , cur - > bc_ops - > name )
__field ( int , nlevels )
__field ( xfs_fileoff_t , fileoff )
) ,
TP_fast_assign (
__entry - > xfino = file_inode ( xfbt - > target - > bt_file ) - > i_ino ;
2024-05-16 17:34:54 +00:00
__assign_str ( btname ) ;
2024-02-22 20:43:35 +00:00
__entry - > nlevels = cur - > bc_nlevels ;
__entry - > fileoff = fileoff ;
) ,
TP_printk ( " xfino 0x%lx %sbt nlevels %d fileoff 0x%llx " ,
__entry - > xfino ,
__get_str ( btname ) ,
__entry - > nlevels ,
( unsigned long long ) __entry - > fileoff )
)
# define DEFINE_XFBTREE_FREESP_EVENT(name) \
DEFINE_EVENT ( xfbtree_freesp_class , name , \
TP_PROTO ( struct xfbtree * xfbt , struct xfs_btree_cur * cur , \
xfs_fileoff_t fileoff ) , \
TP_ARGS ( xfbt , cur , fileoff ) )
DEFINE_XFBTREE_FREESP_EVENT ( xfbtree_alloc_block ) ;
DEFINE_XFBTREE_FREESP_EVENT ( xfbtree_free_block ) ;
# endif /* CONFIG_XFS_BTREE_IN_MEM */
2024-04-15 21:54:17 +00:00
/* exchmaps tracepoints */
# define XFS_EXCHMAPS_STRINGS \
{ XFS_EXCHMAPS_ATTR_FORK , " ATTRFORK " } , \
{ XFS_EXCHMAPS_SET_SIZES , " SETSIZES " } , \
{ XFS_EXCHMAPS_INO1_WRITTEN , " INO1_WRITTEN " } , \
{ XFS_EXCHMAPS_CLEAR_INO1_REFLINK , " CLEAR_INO1_REFLINK " } , \
2024-04-15 21:54:20 +00:00
{ XFS_EXCHMAPS_CLEAR_INO2_REFLINK , " CLEAR_INO2_REFLINK " } , \
{ __XFS_EXCHMAPS_INO2_SHORTFORM , " INO2_SF " }
2024-04-15 21:54:17 +00:00
DEFINE_INODE_IREC_EVENT ( xfs_exchmaps_mapping1_skip ) ;
DEFINE_INODE_IREC_EVENT ( xfs_exchmaps_mapping1 ) ;
DEFINE_INODE_IREC_EVENT ( xfs_exchmaps_mapping2 ) ;
DEFINE_ITRUNC_EVENT ( xfs_exchmaps_update_inode_size ) ;
2024-04-15 21:54:18 +00:00
# define XFS_EXCHRANGE_INODES \
{ 1 , " file1 " } , \
{ 2 , " file2 " }
DECLARE_EVENT_CLASS ( xfs_exchrange_inode_class ,
TP_PROTO ( struct xfs_inode * ip , int whichfile ) ,
TP_ARGS ( ip , whichfile ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( int , whichfile )
__field ( xfs_ino_t , ino )
__field ( int , format )
__field ( xfs_extnum_t , nex )
__field ( int , broot_size )
__field ( int , fork_off )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip ) - > i_sb - > s_dev ;
__entry - > whichfile = whichfile ;
__entry - > ino = ip - > i_ino ;
__entry - > format = ip - > i_df . if_format ;
__entry - > nex = ip - > i_df . if_nextents ;
__entry - > fork_off = xfs_inode_fork_boff ( ip ) ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx whichfile %s format %s num_extents %llu forkoff 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__print_symbolic ( __entry - > whichfile , XFS_EXCHRANGE_INODES ) ,
__print_symbolic ( __entry - > format , XFS_INODE_FORMAT_STR ) ,
__entry - > nex ,
__entry - > fork_off )
)
# define DEFINE_EXCHRANGE_INODE_EVENT(name) \
DEFINE_EVENT ( xfs_exchrange_inode_class , name , \
TP_PROTO ( struct xfs_inode * ip , int whichfile ) , \
TP_ARGS ( ip , whichfile ) )
DEFINE_EXCHRANGE_INODE_EVENT ( xfs_exchrange_before ) ;
DEFINE_EXCHRANGE_INODE_EVENT ( xfs_exchrange_after ) ;
DEFINE_INODE_ERROR_EVENT ( xfs_exchrange_error ) ;
# define XFS_EXCHANGE_RANGE_FLAGS_STRS \
{ XFS_EXCHANGE_RANGE_TO_EOF , " TO_EOF " } , \
{ XFS_EXCHANGE_RANGE_DSYNC , " DSYNC " } , \
{ XFS_EXCHANGE_RANGE_DRY_RUN , " DRY_RUN " } , \
{ XFS_EXCHANGE_RANGE_FILE1_WRITTEN , " F1_WRITTEN " } , \
{ __XFS_EXCHANGE_RANGE_UPD_CMTIME1 , " CMTIME1 " } , \
2024-08-30 22:36:47 +00:00
{ __XFS_EXCHANGE_RANGE_UPD_CMTIME2 , " CMTIME2 " } , \
{ __XFS_EXCHANGE_RANGE_CHECK_FRESH2 , " FRESH2 " }
2024-04-15 21:54:18 +00:00
/* file exchange-range tracepoint class */
DECLARE_EVENT_CLASS ( xfs_exchrange_class ,
TP_PROTO ( const struct xfs_exchrange * fxr , struct xfs_inode * ip1 ,
struct xfs_inode * ip2 ) ,
TP_ARGS ( fxr , ip1 , ip2 ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ip1_ino )
__field ( loff_t , ip1_isize )
__field ( loff_t , ip1_disize )
__field ( xfs_ino_t , ip2_ino )
__field ( loff_t , ip2_isize )
__field ( loff_t , ip2_disize )
__field ( loff_t , file1_offset )
__field ( loff_t , file2_offset )
__field ( unsigned long long , length )
__field ( unsigned long long , flags )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( ip1 ) - > i_sb - > s_dev ;
__entry - > ip1_ino = ip1 - > i_ino ;
__entry - > ip1_isize = VFS_I ( ip1 ) - > i_size ;
__entry - > ip1_disize = ip1 - > i_disk_size ;
__entry - > ip2_ino = ip2 - > i_ino ;
__entry - > ip2_isize = VFS_I ( ip2 ) - > i_size ;
__entry - > ip2_disize = ip2 - > i_disk_size ;
__entry - > file1_offset = fxr - > file1_offset ;
__entry - > file2_offset = fxr - > file2_offset ;
__entry - > length = fxr - > length ;
__entry - > flags = fxr - > flags ;
) ,
TP_printk ( " dev %d:%d flags %s bytecount 0x%llx "
" ino1 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx -> "
" ino2 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__print_flags_u64 ( __entry - > flags , " | " , XFS_EXCHANGE_RANGE_FLAGS_STRS ) ,
__entry - > length ,
__entry - > ip1_ino ,
__entry - > ip1_isize ,
__entry - > ip1_disize ,
__entry - > file1_offset ,
__entry - > ip2_ino ,
__entry - > ip2_isize ,
__entry - > ip2_disize ,
__entry - > file2_offset )
)
# define DEFINE_EXCHRANGE_EVENT(name) \
DEFINE_EVENT ( xfs_exchrange_class , name , \
TP_PROTO ( const struct xfs_exchrange * fxr , struct xfs_inode * ip1 , \
struct xfs_inode * ip2 ) , \
TP_ARGS ( fxr , ip1 , ip2 ) )
DEFINE_EXCHRANGE_EVENT ( xfs_exchrange_prep ) ;
DEFINE_EXCHRANGE_EVENT ( xfs_exchrange_flush ) ;
DEFINE_EXCHRANGE_EVENT ( xfs_exchrange_mappings ) ;
2024-08-30 22:36:47 +00:00
TRACE_EVENT ( xfs_exchrange_freshness ,
TP_PROTO ( const struct xfs_exchrange * fxr , struct xfs_inode * ip2 ) ,
TP_ARGS ( fxr , ip2 ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ip2_ino )
__field ( long long , ip2_mtime )
__field ( long long , ip2_ctime )
__field ( int , ip2_mtime_nsec )
__field ( int , ip2_ctime_nsec )
__field ( xfs_ino_t , file2_ino )
__field ( long long , file2_mtime )
__field ( long long , file2_ctime )
__field ( int , file2_mtime_nsec )
__field ( int , file2_ctime_nsec )
) ,
TP_fast_assign (
struct timespec64 ts64 ;
struct inode * inode2 = VFS_I ( ip2 ) ;
__entry - > dev = inode2 - > i_sb - > s_dev ;
__entry - > ip2_ino = ip2 - > i_ino ;
ts64 = inode_get_ctime ( inode2 ) ;
__entry - > ip2_ctime = ts64 . tv_sec ;
__entry - > ip2_ctime_nsec = ts64 . tv_nsec ;
ts64 = inode_get_mtime ( inode2 ) ;
__entry - > ip2_mtime = ts64 . tv_sec ;
__entry - > ip2_mtime_nsec = ts64 . tv_nsec ;
__entry - > file2_ino = fxr - > file2_ino ;
__entry - > file2_mtime = fxr - > file2_mtime . tv_sec ;
__entry - > file2_ctime = fxr - > file2_ctime . tv_sec ;
__entry - > file2_mtime_nsec = fxr - > file2_mtime . tv_nsec ;
__entry - > file2_ctime_nsec = fxr - > file2_ctime . tv_nsec ;
) ,
TP_printk ( " dev %d:%d "
" ino 0x%llx mtime %lld:%d ctime %lld:%d -> "
" file 0x%llx mtime %lld:%d ctime %lld:%d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ip2_ino ,
__entry - > ip2_mtime ,
__entry - > ip2_mtime_nsec ,
__entry - > ip2_ctime ,
__entry - > ip2_ctime_nsec ,
__entry - > file2_ino ,
__entry - > file2_mtime ,
__entry - > file2_mtime_nsec ,
__entry - > file2_ctime ,
__entry - > file2_ctime_nsec )
) ;
2024-04-15 21:54:17 +00:00
TRACE_EVENT ( xfs_exchmaps_overhead ,
TP_PROTO ( struct xfs_mount * mp , unsigned long long bmbt_blocks ,
unsigned long long rmapbt_blocks ) ,
TP_ARGS ( mp , bmbt_blocks , rmapbt_blocks ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( unsigned long long , bmbt_blocks )
__field ( unsigned long long , rmapbt_blocks )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > bmbt_blocks = bmbt_blocks ;
__entry - > rmapbt_blocks = rmapbt_blocks ;
) ,
TP_printk ( " dev %d:%d bmbt_blocks 0x%llx rmapbt_blocks 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > bmbt_blocks ,
__entry - > rmapbt_blocks )
) ;
DECLARE_EVENT_CLASS ( xfs_exchmaps_estimate_class ,
TP_PROTO ( const struct xfs_exchmaps_req * req ) ,
TP_ARGS ( req ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino1 )
__field ( xfs_ino_t , ino2 )
__field ( xfs_fileoff_t , startoff1 )
__field ( xfs_fileoff_t , startoff2 )
__field ( xfs_filblks_t , blockcount )
__field ( uint64_t , flags )
__field ( xfs_filblks_t , ip1_bcount )
__field ( xfs_filblks_t , ip2_bcount )
__field ( xfs_filblks_t , ip1_rtbcount )
__field ( xfs_filblks_t , ip2_rtbcount )
__field ( unsigned long long , resblks )
__field ( unsigned long long , nr_exchanges )
) ,
TP_fast_assign (
__entry - > dev = req - > ip1 - > i_mount - > m_super - > s_dev ;
__entry - > ino1 = req - > ip1 - > i_ino ;
__entry - > ino2 = req - > ip2 - > i_ino ;
__entry - > startoff1 = req - > startoff1 ;
__entry - > startoff2 = req - > startoff2 ;
__entry - > blockcount = req - > blockcount ;
__entry - > flags = req - > flags ;
__entry - > ip1_bcount = req - > ip1_bcount ;
__entry - > ip2_bcount = req - > ip2_bcount ;
__entry - > ip1_rtbcount = req - > ip1_rtbcount ;
__entry - > ip2_rtbcount = req - > ip2_rtbcount ;
__entry - > resblks = req - > resblks ;
__entry - > nr_exchanges = req - > nr_exchanges ;
) ,
TP_printk ( " dev %d:%d ino1 0x%llx fileoff1 0x%llx ino2 0x%llx fileoff2 0x%llx fsbcount 0x%llx flags (%s) bcount1 0x%llx rtbcount1 0x%llx bcount2 0x%llx rtbcount2 0x%llx resblks 0x%llx nr_exchanges %llu " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino1 , __entry - > startoff1 ,
__entry - > ino2 , __entry - > startoff2 ,
__entry - > blockcount ,
__print_flags_u64 ( __entry - > flags , " | " , XFS_EXCHMAPS_STRINGS ) ,
__entry - > ip1_bcount ,
__entry - > ip1_rtbcount ,
__entry - > ip2_bcount ,
__entry - > ip2_rtbcount ,
__entry - > resblks ,
__entry - > nr_exchanges )
) ;
# define DEFINE_EXCHMAPS_ESTIMATE_EVENT(name) \
DEFINE_EVENT ( xfs_exchmaps_estimate_class , name , \
TP_PROTO ( const struct xfs_exchmaps_req * req ) , \
TP_ARGS ( req ) )
DEFINE_EXCHMAPS_ESTIMATE_EVENT ( xfs_exchmaps_initial_estimate ) ;
DEFINE_EXCHMAPS_ESTIMATE_EVENT ( xfs_exchmaps_final_estimate ) ;
DECLARE_EVENT_CLASS ( xfs_exchmaps_intent_class ,
TP_PROTO ( struct xfs_mount * mp , const struct xfs_exchmaps_intent * xmi ) ,
TP_ARGS ( mp , xmi ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino1 )
__field ( xfs_ino_t , ino2 )
__field ( uint64_t , flags )
__field ( xfs_fileoff_t , startoff1 )
__field ( xfs_fileoff_t , startoff2 )
__field ( xfs_filblks_t , blockcount )
__field ( xfs_fsize_t , isize1 )
__field ( xfs_fsize_t , isize2 )
__field ( xfs_fsize_t , new_isize1 )
__field ( xfs_fsize_t , new_isize2 )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > ino1 = xmi - > xmi_ip1 - > i_ino ;
__entry - > ino2 = xmi - > xmi_ip2 - > i_ino ;
__entry - > flags = xmi - > xmi_flags ;
__entry - > startoff1 = xmi - > xmi_startoff1 ;
__entry - > startoff2 = xmi - > xmi_startoff2 ;
__entry - > blockcount = xmi - > xmi_blockcount ;
__entry - > isize1 = xmi - > xmi_ip1 - > i_disk_size ;
__entry - > isize2 = xmi - > xmi_ip2 - > i_disk_size ;
__entry - > new_isize1 = xmi - > xmi_isize1 ;
__entry - > new_isize2 = xmi - > xmi_isize2 ;
) ,
TP_printk ( " dev %d:%d ino1 0x%llx fileoff1 0x%llx ino2 0x%llx fileoff2 0x%llx fsbcount 0x%llx flags (%s) isize1 0x%llx newisize1 0x%llx isize2 0x%llx newisize2 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino1 , __entry - > startoff1 ,
__entry - > ino2 , __entry - > startoff2 ,
__entry - > blockcount ,
__print_flags_u64 ( __entry - > flags , " | " , XFS_EXCHMAPS_STRINGS ) ,
__entry - > isize1 , __entry - > new_isize1 ,
__entry - > isize2 , __entry - > new_isize2 )
) ;
# define DEFINE_EXCHMAPS_INTENT_EVENT(name) \
DEFINE_EVENT ( xfs_exchmaps_intent_class , name , \
TP_PROTO ( struct xfs_mount * mp , const struct xfs_exchmaps_intent * xmi ) , \
TP_ARGS ( mp , xmi ) )
DEFINE_EXCHMAPS_INTENT_EVENT ( xfs_exchmaps_defer ) ;
DEFINE_EXCHMAPS_INTENT_EVENT ( xfs_exchmaps_recover ) ;
TRACE_EVENT ( xfs_exchmaps_delta_nextents_step ,
TP_PROTO ( struct xfs_mount * mp ,
const struct xfs_bmbt_irec * left ,
const struct xfs_bmbt_irec * curr ,
const struct xfs_bmbt_irec * new ,
const struct xfs_bmbt_irec * right ,
int delta , unsigned int state ) ,
TP_ARGS ( mp , left , curr , new , right , delta , state ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_fileoff_t , loff )
__field ( xfs_fsblock_t , lstart )
__field ( xfs_filblks_t , lcount )
__field ( xfs_fileoff_t , coff )
__field ( xfs_fsblock_t , cstart )
__field ( xfs_filblks_t , ccount )
__field ( xfs_fileoff_t , noff )
__field ( xfs_fsblock_t , nstart )
__field ( xfs_filblks_t , ncount )
__field ( xfs_fileoff_t , roff )
__field ( xfs_fsblock_t , rstart )
__field ( xfs_filblks_t , rcount )
__field ( int , delta )
__field ( unsigned int , state )
) ,
TP_fast_assign (
__entry - > dev = mp - > m_super - > s_dev ;
__entry - > loff = left - > br_startoff ;
__entry - > lstart = left - > br_startblock ;
__entry - > lcount = left - > br_blockcount ;
__entry - > coff = curr - > br_startoff ;
__entry - > cstart = curr - > br_startblock ;
__entry - > ccount = curr - > br_blockcount ;
__entry - > noff = new - > br_startoff ;
__entry - > nstart = new - > br_startblock ;
__entry - > ncount = new - > br_blockcount ;
__entry - > roff = right - > br_startoff ;
__entry - > rstart = right - > br_startblock ;
__entry - > rcount = right - > br_blockcount ;
__entry - > delta = delta ;
__entry - > state = state ;
) ,
TP_printk ( " dev %d:%d left 0x%llx:0x%llx:0x%llx; curr 0x%llx:0x%llx:0x%llx <- new 0x%llx:0x%llx:0x%llx; right 0x%llx:0x%llx:0x%llx delta %d state 0x%x " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > loff , __entry - > lstart , __entry - > lcount ,
__entry - > coff , __entry - > cstart , __entry - > ccount ,
__entry - > noff , __entry - > nstart , __entry - > ncount ,
__entry - > roff , __entry - > rstart , __entry - > rcount ,
__entry - > delta , __entry - > state )
) ;
TRACE_EVENT ( xfs_exchmaps_delta_nextents ,
TP_PROTO ( const struct xfs_exchmaps_req * req , int64_t d_nexts1 ,
int64_t d_nexts2 ) ,
TP_ARGS ( req , d_nexts1 , d_nexts2 ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino1 )
__field ( xfs_ino_t , ino2 )
__field ( xfs_extnum_t , nexts1 )
__field ( xfs_extnum_t , nexts2 )
__field ( int64_t , d_nexts1 )
__field ( int64_t , d_nexts2 )
) ,
TP_fast_assign (
int whichfork = xfs_exchmaps_reqfork ( req ) ;
__entry - > dev = req - > ip1 - > i_mount - > m_super - > s_dev ;
__entry - > ino1 = req - > ip1 - > i_ino ;
__entry - > ino2 = req - > ip2 - > i_ino ;
__entry - > nexts1 = xfs_ifork_ptr ( req - > ip1 , whichfork ) - > if_nextents ;
__entry - > nexts2 = xfs_ifork_ptr ( req - > ip2 , whichfork ) - > if_nextents ;
__entry - > d_nexts1 = d_nexts1 ;
__entry - > d_nexts2 = d_nexts2 ;
) ,
TP_printk ( " dev %d:%d ino1 0x%llx nexts %llu ino2 0x%llx nexts %llu delta1 %lld delta2 %lld " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino1 , __entry - > nexts1 ,
__entry - > ino2 , __entry - > nexts2 ,
__entry - > d_nexts1 , __entry - > d_nexts2 )
) ;
2024-04-22 16:47:55 +00:00
DECLARE_EVENT_CLASS ( xfs_getparents_rec_class ,
TP_PROTO ( struct xfs_inode * ip , const struct xfs_getparents * ppi ,
const struct xfs_attr_list_context * context ,
const struct xfs_getparents_rec * pptr ) ,
TP_ARGS ( ip , ppi , context , pptr ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( unsigned int , firstu )
__field ( unsigned short , reclen )
__field ( unsigned int , bufsize )
__field ( xfs_ino_t , parent_ino )
__field ( unsigned int , parent_gen )
__string ( name , pptr - > gpr_name )
) ,
TP_fast_assign (
__entry - > dev = ip - > i_mount - > m_super - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > firstu = context - > firstu ;
__entry - > reclen = pptr - > gpr_reclen ;
__entry - > bufsize = ppi - > gp_bufsize ;
__entry - > parent_ino = pptr - > gpr_parent . ha_fid . fid_ino ;
__entry - > parent_gen = pptr - > gpr_parent . ha_fid . fid_gen ;
2024-05-16 17:34:54 +00:00
__assign_str ( name ) ;
2024-04-22 16:47:55 +00:00
) ,
TP_printk ( " dev %d:%d ino 0x%llx firstu %u reclen %u bufsize %u parent_ino 0x%llx parent_gen 0x%x name '%s' " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > firstu ,
__entry - > reclen ,
__entry - > bufsize ,
__entry - > parent_ino ,
__entry - > parent_gen ,
__get_str ( name ) )
)
# define DEFINE_XFS_GETPARENTS_REC_EVENT(name) \
DEFINE_EVENT ( xfs_getparents_rec_class , name , \
TP_PROTO ( struct xfs_inode * ip , const struct xfs_getparents * ppi , \
const struct xfs_attr_list_context * context , \
const struct xfs_getparents_rec * pptr ) , \
TP_ARGS ( ip , ppi , context , pptr ) )
DEFINE_XFS_GETPARENTS_REC_EVENT ( xfs_getparents_put_listent ) ;
DEFINE_XFS_GETPARENTS_REC_EVENT ( xfs_getparents_expand_lastrec ) ;
DECLARE_EVENT_CLASS ( xfs_getparents_class ,
TP_PROTO ( struct xfs_inode * ip , const struct xfs_getparents * ppi ,
const struct xfs_attrlist_cursor_kern * cur ) ,
TP_ARGS ( ip , ppi , cur ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , ino )
__field ( unsigned short , iflags )
__field ( unsigned short , oflags )
__field ( unsigned int , bufsize )
__field ( unsigned int , hashval )
__field ( unsigned int , blkno )
__field ( unsigned int , offset )
__field ( int , initted )
) ,
TP_fast_assign (
__entry - > dev = ip - > i_mount - > m_super - > s_dev ;
__entry - > ino = ip - > i_ino ;
__entry - > iflags = ppi - > gp_iflags ;
__entry - > oflags = ppi - > gp_oflags ;
__entry - > bufsize = ppi - > gp_bufsize ;
__entry - > hashval = cur - > hashval ;
__entry - > blkno = cur - > blkno ;
__entry - > offset = cur - > offset ;
__entry - > initted = cur - > initted ;
) ,
TP_printk ( " dev %d:%d ino 0x%llx iflags 0x%x oflags 0x%x bufsize %u cur_init? %d hashval 0x%x blkno %u offset %u " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > ino ,
__entry - > iflags ,
__entry - > oflags ,
__entry - > bufsize ,
__entry - > initted ,
__entry - > hashval ,
__entry - > blkno ,
__entry - > offset )
)
# define DEFINE_XFS_GETPARENTS_EVENT(name) \
DEFINE_EVENT ( xfs_getparents_class , name , \
TP_PROTO ( struct xfs_inode * ip , const struct xfs_getparents * ppi , \
const struct xfs_attrlist_cursor_kern * cur ) , \
TP_ARGS ( ip , ppi , cur ) )
DEFINE_XFS_GETPARENTS_EVENT ( xfs_getparents_begin ) ;
DEFINE_XFS_GETPARENTS_EVENT ( xfs_getparents_end ) ;
2024-11-04 04:18:52 +00:00
DECLARE_EVENT_CLASS ( xfs_metadir_update_class ,
TP_PROTO ( const struct xfs_metadir_update * upd ) ,
TP_ARGS ( upd ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , dp_ino )
__field ( xfs_ino_t , ino )
__string ( fname , upd - > path )
) ,
TP_fast_assign (
__entry - > dev = upd - > dp - > i_mount - > m_super - > s_dev ;
__entry - > dp_ino = upd - > dp - > i_ino ;
__entry - > ino = upd - > ip ? upd - > ip - > i_ino : NULLFSINO ;
__assign_str ( fname ) ;
) ,
TP_printk ( " dev %d:%d dp 0x%llx fname '%s' ino 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > dp_ino ,
__get_str ( fname ) ,
__entry - > ino )
)
# define DEFINE_METADIR_UPDATE_EVENT(name) \
DEFINE_EVENT ( xfs_metadir_update_class , name , \
TP_PROTO ( const struct xfs_metadir_update * upd ) , \
TP_ARGS ( upd ) )
DEFINE_METADIR_UPDATE_EVENT ( xfs_metadir_start_create ) ;
DEFINE_METADIR_UPDATE_EVENT ( xfs_metadir_start_link ) ;
DEFINE_METADIR_UPDATE_EVENT ( xfs_metadir_commit ) ;
DEFINE_METADIR_UPDATE_EVENT ( xfs_metadir_cancel ) ;
DEFINE_METADIR_UPDATE_EVENT ( xfs_metadir_try_create ) ;
DEFINE_METADIR_UPDATE_EVENT ( xfs_metadir_create ) ;
DEFINE_METADIR_UPDATE_EVENT ( xfs_metadir_link ) ;
DECLARE_EVENT_CLASS ( xfs_metadir_update_error_class ,
TP_PROTO ( const struct xfs_metadir_update * upd , int error ) ,
TP_ARGS ( upd , error ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , dp_ino )
__field ( xfs_ino_t , ino )
__field ( int , error )
__string ( fname , upd - > path )
) ,
TP_fast_assign (
__entry - > dev = upd - > dp - > i_mount - > m_super - > s_dev ;
__entry - > dp_ino = upd - > dp - > i_ino ;
__entry - > ino = upd - > ip ? upd - > ip - > i_ino : NULLFSINO ;
__entry - > error = error ;
__assign_str ( fname ) ;
) ,
TP_printk ( " dev %d:%d dp 0x%llx fname '%s' ino 0x%llx error %d " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > dp_ino ,
__get_str ( fname ) ,
__entry - > ino ,
__entry - > error )
)
# define DEFINE_METADIR_UPDATE_ERROR_EVENT(name) \
DEFINE_EVENT ( xfs_metadir_update_error_class , name , \
TP_PROTO ( const struct xfs_metadir_update * upd , int error ) , \
TP_ARGS ( upd , error ) )
DEFINE_METADIR_UPDATE_ERROR_EVENT ( xfs_metadir_teardown ) ;
DECLARE_EVENT_CLASS ( xfs_metadir_class ,
TP_PROTO ( struct xfs_inode * dp , struct xfs_name * name ,
xfs_ino_t ino ) ,
TP_ARGS ( dp , name , ino ) ,
TP_STRUCT__entry (
__field ( dev_t , dev )
__field ( xfs_ino_t , dp_ino )
__field ( xfs_ino_t , ino )
__field ( int , ftype )
__field ( int , namelen )
__dynamic_array ( char , name , name - > len )
) ,
TP_fast_assign (
__entry - > dev = VFS_I ( dp ) - > i_sb - > s_dev ;
__entry - > dp_ino = dp - > i_ino ;
__entry - > ino = ino ,
__entry - > ftype = name - > type ;
__entry - > namelen = name - > len ;
memcpy ( __get_str ( name ) , name - > name , name - > len ) ;
) ,
TP_printk ( " dev %d:%d dir 0x%llx type %s name '%.*s' ino 0x%llx " ,
MAJOR ( __entry - > dev ) , MINOR ( __entry - > dev ) ,
__entry - > dp_ino ,
__print_symbolic ( __entry - > ftype , XFS_DIR3_FTYPE_STR ) ,
__entry - > namelen ,
__get_str ( name ) ,
__entry - > ino )
)
# define DEFINE_METADIR_EVENT(name) \
DEFINE_EVENT ( xfs_metadir_class , name , \
TP_PROTO ( struct xfs_inode * dp , struct xfs_name * name , \
xfs_ino_t ino ) , \
TP_ARGS ( dp , name , ino ) )
DEFINE_METADIR_EVENT ( xfs_metadir_lookup ) ;
2009-12-14 23:14:59 +00:00
# endif /* _TRACE_XFS_H */
# undef TRACE_INCLUDE_PATH
# define TRACE_INCLUDE_PATH .
# define TRACE_INCLUDE_FILE xfs_trace
# include <trace/define_trace.h>