#! /bin/bash # FSQA Test No. 098 # # Test that if we fsync a file that got one extent partially cloned into a # lower file offset, after a power failure our file has the same content it # had before the power failure and after the extent cloning operation. # #----------------------------------------------------------------------- # # Copyright (C) 2015 SUSE Linux Products GmbH. All Rights Reserved. # Author: Filipe Manana # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #----------------------------------------------------------------------- # seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { _cleanup_flakey rm -f $tmp.* } # get standard environment, filters and checks . ./common/rc . ./common/filter . ./common/dmflakey # real QA test starts here _need_to_be_root _supported_fs btrfs _supported_os Linux _require_scratch _require_dm_flakey _require_cloner _require_metadata_journaling $SCRATCH_DEV rm -f $seqres.full _scratch_mkfs >>$seqres.full 2>&1 _init_flakey _mount_flakey # Create our test file with a single 100K extent starting at file offset 800K. # We fsync the file here to make the fsync log tree gets a single csum item that # covers the whole 100K extent, which causes the second fsync, done after the # cloning operation below, to not leave in the log tree two csum items covering # two sub-ranges ([0, 20K[ and [20K, 100K[)) of our extent. $XFS_IO_PROG -f -c "pwrite -S 0xaa 800K 100K" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io # Now clone part of our extent into file offset 400K. This adds a file extent # item to our inode's metadata that points to the 100K extent we created before, # using a data offset of 20K and a data length of 20K, so that it refers to # the sub-range [20K, 40K[ of our original extent. $CLONER_PROG -s $((800 * 1024 + 20 * 1024)) -d $((400 * 1024)) \ -l $((20 * 1024)) $SCRATCH_MNT/foo $SCRATCH_MNT/foo # Now fsync our file to make sure the extent cloning is durably persisted. This # fsync will not add a second csum item to the log tree containing the checksums # for the blocks in the sub-range [20K, 40K[ of our extent, because there was # already a csum item in the log tree covering the whole extent, added by the # first fsync we did before. $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foo echo "File digest before power failure:" md5sum $SCRATCH_MNT/foo | _filter_scratch # Silently drop all writes and ummount to simulate a crash/power failure. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again, mount to trigger log replay and validate file contents. # The fsync log replay first processes the file extent item corresponding to the # file offset 400K (the one which refers to the [20K, 40K[ sub-range of our 100K # extent) and then processes the file extent item for file offset 800K. It used # to happen that when processing the later, it erroneously left in the csum tree # 2 csum items that overlapped each other, 1 for the sub-range [20K, 40K[ and 1 # for the whole range of our extent. This introduced a problem where subsequent # lookups for the checksums of blocks within the range [40K, 100K[ of our extent # would not find anything because lookups in the csum tree ended up looking only # at the smaller csum item, the one covering the subrange [20K, 40K[. This made # read requests assume an expected checksum with a value of 0 for those blocks, # which caused checksum verification failure when the read operations finished. # However those checksum failure did not result in read requests returning an # error to user space (like -EIO for e.g.) because the expected checksum value # had the special value 0, and in that case btrfs set all bytes of the # corresponding pages with the value 0x01 and produce the following warning in # dmesg/syslog: # # "BTRFS warning (device dm-0): csum failed ino 257 off 917504 csum 1322675045\ # expected csum 0" # _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey echo "File digest after log replay:" # Must match the same digest he had after cloning the extent and before the # power failure happened. md5sum $SCRATCH_MNT/foo | _filter_scratch _unmount_flakey status=0 exit