[root@localhost caq]# xfs_db -c frag -r /dev/sdaw
actual , ideal , fragmentation factor 82.56%
Note, this number is largely meaningless.
Files on this filesystem average 5.73 extents per file
[root@localhost caq]# xfs_fsr /dev/sdaw
/mnt/K4HUWARB start inode= [root@localhost caq]#
[root@localhost caq]#
[root@localhost caq]# xfs_db -c frag -r /dev/sdaw
actual , ideal , fragmentation factor 14.79%
Note, this number is largely meaningless.
Files on this filesystem average 1.17 extents per file
在碎片化整理之前,碎片化率为82.56%,一般来说,碎片化率高于45%,则建议整理,一来连续的块可以提高读的性能,二来可以省出很多block。
除了xfs_fsr,还有很多有用的工具。
xfs_admin xfs_db xfs_freeze xfs_info xfs_logprint xfs_mkfile xfs_repair
xfs_bmap xfsdump xfs_fsr xfsinvutil xfs_mdrestore xfs_ncheck xfsrestore
xfs_copy xfs_estimate xfs_growfs xfs_io xfs_metadump xfs_quota xfs_rtcp
用的最多的是xfs_repair,xfs_check,xfs_db.
xfs_info的使用:
[root@localhost caq]# xfs_info /dev/sdaw
meta-data=/dev/sdaw isize= agcount=, agsize= blks
= sectsz= attr=
data = bsize= blocks=, imaxpct=
= sunit= swidth= blks
naming =version bsize= ascii-ci=
log =internal bsize= blocks=, version=
= sectsz= sunit= blks, lazy-count=
realtime =none extsz= blocks=, rtextents=
可以看出,sdaw有4个AG,block个数为 488378646,每个AG管理的block数量为总数量/4, 扇区大小为512,
swidth=0 条带参数,raid使用。
该挂载点的xfs的日志记录在内部,有些设备上面既有慢速的机械盘,又有快速的ssd甚至nvme盘,则可以把慢速设备的xfs日志放在nvme上,可以提高性能。
[root@localhost caq]# xfs_info -V
xfs_info version 3.1.
xfs_admin 的使用:
该工具能修改xfs文件系统参数
设置uuid xfs_admin –U uuid /dev/sdc
设置label xfsadmin –L label /dev/sdc
xfs_db工具能打印和修改未mount的,xfs元数据结构体:
xfs_db /dev/sdc
xfs_db> sb
xfs_db> p
magicnum = 0x58465342
blocksize = 4096
dblocks = 524288
修改:xfs_db -x -c 'sb 0' -c 'write dblocks 0' /dev/sdc
查看文件inode xfs_db> inode 131
xfs_db> p
core.magic = 0x494e
core.mode = 0100644
core.version = 2
打印文件系统剩余空间:
xfs_db> freesp
from to extents blocks pct
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.02
0.13
0.29
0.63
0.76
0.99
1.46
0.22
95.50
打印原始16进制数据:
xfs_db> type text
xfs_db> p
00: 49 4e 81 ed 02 02 00 00 00 00 00 00 00 00 00 00 IN..............
10: 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 01 ................
20: 54 a2 14 e5 01 bb 2a 29 54 a2 14 e5 02 72 45 2a T.......T....rE.
30: 54 a2 14 e5 02 72 45 2a 00 00 00 00 00 00 22 4c T....rE........L
40: 00 00 00 00 00 00 00 03 00 00 00 00 00 00 00 01 ................
…..
xfs对block的管理是通过分级来管理数据的,扇区,块,et,ag。
其中:sector<block<extent<ag
typedef struct xfs_agf {
/*
* Common allocation group header information
*/
__be32 agf_magicnum; /* magic number == XFS_AGF_MAGIC */
__be32 agf_versionnum; /* header version == XFS_AGF_VERSION */
__be32 agf_seqno; /* sequence # starting from 0 */
__be32 agf_length; /* size in blocks of a.g. */
/*
* Freespace information
*/
__be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */
__be32 agf_spare0; /* spare field */
__be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */
__be32 agf_spare1; /* spare field */ __be32 agf_flfirst; /* first freelist block's index */
__be32 agf_fllast; /* last freelist block's index */
__be32 agf_flcount; /* count of blocks in freelist */
__be32 agf_freeblks; /* total free blocks */ __be32 agf_longest; /* longest free space */
__be32 agf_btreeblks; /* # of blocks held in AGF btrees */
uuid_t agf_uuid; /* uuid of filesystem */------------------------------以下字段在2.6.32版本中没看到,3.10对应的xfs模块有,中间版本没对比过 /*
* reserve some contiguous space for future logged fields before we add
* the unlogged fields. This makes the range logging via flags and
* structure offsets much simpler.
*/
__be64 agf_spare64[]; /* unlogged fields, written during buffer writeback. */
__be64 agf_lsn; /* last write sequence */
__be32 agf_crc; /* crc of agf sector */
__be32 agf_spare2; /* structure must be padded to 64 bit alignment */
} xfs_agf_t;
可以查看一个agf对比下:
[root@localhost caq]# xfs_db /dev/sdaw
xfs_db> agf
xfs_db> p
magicnum = 0x58414746--------------对比agf_magicnum字段
versionnum = 1---------------------对比agf_versionnum字段
seqno = 0--------------------------对比agf_seqno
length = 30523648------------------对比agf_length
bnoroot = 144----------------------agf_roots
cntroot =
bnolevel =
cntlevel =
flfirst =
fllast =
flcount =
freeblks = 29067437----------------空闲的block数
longest = 28900480-----------------最大的空闲段长度
btreeblks =
uuid = 6c46ba7a-03b6-46bc-912c-7bac04aafe92
lsn = 0xd000484b0
crc = 0x37a41d5b (correct)----------crc字段
查看144这个block的信息:
xfs_db> fsblock
xfs_db> p
: ffffffff ffffffff 0000000d 000484b0
: 6c46ba7a 03b646bc 912c7bac 04aafe92 8e0f24fb
: 0000006a 000006a4 0000005c 000002f9 000049a4 0000005c
: 00006a24 0000005c 00006f24 00001cdc 00008d00
: 00015b00 00000f00 00016a01 0000007f 0001aa80 00002d00
0a0: 00031f80 00043b88
0c0: 0004b704 0000007c 0004f780 0000037c
0e0: 0009a680 000a6a80
: 000aab80 00000c00 000ab880 000b6c80 000c6e80
: 000d7480 000df088 000000f8 000e3184 0000007c 000f7200 00000f80
: 00003f80 00003c00
: 0016c480 0018c480 01b8fc80 0018c480 01b8fc80
: 0018c480 01b8fc80 0018c480 01b8fc80 0018c480 01b8fc80 001a1e00 01b7a300
1a0: 001a2500 01b79c00 001a2b00 01b79600 001a3100 01b79000 001a3100 01b79000
1c0: 001a3900 01b78800 001a3900 01b78800 001a3b00 01b78600 001a4100 01b78000
这样打印不明显,可以跟printf一样,格式化,
xfs_db> fsblock
xfs_db> type bnobt-----------------指定打印的type类型
xfs_db> p
magic = 0x41423342-----------------这个可以看到,和上图直接打印的magic是一样的,就是‘ABTB’
level = 0--------------------------表示叶子节点,1表示中间节点
numrecs = 40-----------------------本叶子节点管理的空闲block块,就是recs的数组中有效的元素个数
leftsib = null
rightsib = null
bno =
lsn = 0xd000484b0
uuid = 6c46ba7a-03b6-46bc-912c-7bac04aafe92
owner =
crc = 0x8e0f24fb (correct)
recs[-] = [startblock,blockcount] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,] :[,]
查看xfs超级块的一些信息:
[root@localhost /]# xfs_db /dev/sdaw
xfs_db> sb 0-----------------超级块
xfs_db> p
magicnum = 0x58465342--------这个就是XFSB的acsii码
blocksize = 4096-------------逻辑块大小
dblocks = 976754646----------磁盘总块数,块数*块大小就是磁盘的空间了
rblocks =
rextents =
uuid = 6c46ba7a-03b6-46bc-912c-7bac04aafe92
logstart =
rootino = 1024---------------根节点inode号
rbmino =
rsumino =
rextsize =
agblocks = 30523648----------每个AG的块数量,
agcount = 32-----------------32个AG,和AG块数乘起来就是块总数
rbmblocks =
logblocks = 476930-----------日志块数
versionnum = 0xbda5
sectsize =
inodesize =
inopblock = 8----------------每个block可以存储的inode的个数,4096/512=8
fname = "\000\000\000\000\000\000\000\000\000\000\000\000"--------------文件系统的名称
blocklog = 12----------------2的12次方,这个可以和page的shift类比,
sectlog = 12-----------------sector大小的log表示,
inodelog = 9-----------------inode大小的log表示
inopblog = 3-----------------每个block可以存储的inode的log表示,
agblklog = 25----------------每个ag可以管理的blcok个数的log表示,这个存在向上取整
rextslog =
inprogress =
imax_pct =
icount =
ifree =
fdblocks =
frextents =
uquotino = null
gquotino = null
qflags =
flags =
shared_vn =
inoalignmt =
unit =
width =
dirblklog =
logsectlog =
logsectsize =
logsunit =
features2 = 0x18a
bad_features2 = 0x18a
features_compat =
features_ro_compat =
features_incompat = 0x1
features_log_incompat =
crc = 0xd0deb599 (correct)
spino_align =
pquotino = null
lsn = 0xd00048528
meta_uuid = ----
xfs的加载过程及常见维护的队列:
Xfs加载ko入口函数在init_xfs_fs,主要申请xfs内核模块使用的内存资源,注册xfs文件系统。
常驻内存工作队列说明:
[xfsalloc] 模块加载产生,用于文件申请extent的时候,调用__xfs_bmapi_allocate,分配extent。
[xfs_mru_cache] 模块加载产生,用于访问MRU (Most Recently Used) Cache的时候使用。
[xfslogd]模块加载产生,xfs_buf的IO结束调用 b_iodone_work的队列。
产生阶段为mount阶段,此时会有指定的实例,挂载点,设备号等。
[xfs-data/sdc]mount产生,数据IO的异步操作队列,directIO结束的时候调用io_work。
[xfs-conv/sdc] mount产生,数据IO的异步操作队列,bufIO结束时候调用io_work。
[xfs-cil/sdc] mount产生,push日志的工作队列,用于xfs_trans_commit将事务提交到日志里。
[xfsaild/sdc] mount产生, xfs_log_worker触发,将脏节点push到buf,再将buf下盘,umount的时候也经常调用类似流程,如 xfs_ail_push_all_sync 函数。
参考资料:
http://xfs.org/
https://access.redhat.com/
中兴OS团队的相关GPL文档。