Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- root@archiso ~ # uname -r
- 4.16.5-1-ARCH
- root@archiso /mnt # btrfs --version
- btrfs-progs v4.16
- # Setup a 3 device RAID1. This example is all on the same disk, but it makes no difference.
- root@archiso ~ # mkfs.btrfs --data raid1 --metadata raid1 /dev/vda{1,2,3} -f
- btrfs-progs v4.16
- See http://btrfs.wiki.kernel.org for more information.
- Label: (null)
- UUID: b765aaea-c99c-4950-a347-354b28e6cc50
- Node size: 16384
- Sector size: 4096
- Filesystem size: 30.00GiB
- Block group profiles:
- Data: RAID1 1.00GiB
- Metadata: RAID1 1.00GiB
- System: RAID1 8.00MiB
- SSD detected: no
- Incompat features: extref, skinny-metadata
- Number of devices: 3
- Devices:
- ID SIZE PATH
- 1 10.00GiB /dev/vda1
- 2 10.00GiB /dev/vda2
- 3 10.00GiB /dev/vda3
- # Mount using default (zstd) compression. Lzo has the same behavior, and I'm assuming zlib does too.
- root@archiso ~ # mount -o compress /dev/vda1 /mnt
- root@archiso ~ # cd /mnt
- # Create 1MB file filled with zeros, and by submitting it for defragmentation with compression, force it to be compressed despite being NOCOW and NODATASUM.
- root@archiso /mnt # touch zero
- root@archiso /mnt # chattr +C zero
- root@archiso /mnt # dd if=/dev/zero of=zero bs=1M count=1
- 1+0 records in
- 1+0 records out
- 1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.00568061 s, 185 MB/s
- root@archiso /mnt # sync
- root@archiso /mnt # filefrag -v zero
- Filesystem type is: 9123683e
- File size of zero is 1048576 (256 blocks of 4096 bytes)
- ext: logical_offset: physical_offset: length: expected: flags:
- 0: 0.. 255: 269568.. 269823: 256: last,eof
- zero: 1 extent found
- root@archiso /mnt # btrfs fi def -c zero
- # Confirm the file is now stored as compressed. Note filefrag correctly shows ending logical offsets, but only understands a single length field which is the uncompressed length, so it can shows a much later ending physical offset. It just doesn't understand btrfs compression. Note the starting physical offsets are consecutive, and the 1MB of zeros has been compressed into 8 extents of 4k each (32k total) proving the file was forced to be compressed.
- root@archiso /mnt # filefrag -v zero
- Filesystem type is: 9123683e
- File size of zero is 1048576 (256 blocks of 4096 bytes)
- ext: logical_offset: physical_offset: length: expected: flags:
- 0: 0.. 31: 269952.. 269983: 32: encoded
- 1: 32.. 63: 269953.. 269984: 32: 269984: encoded
- 2: 64.. 95: 269954.. 269985: 32: 269985: encoded
- 3: 96.. 127: 269955.. 269986: 32: 269986: encoded
- 4: 128.. 159: 269956.. 269987: 32: 269987: encoded
- 5: 160.. 191: 269957.. 269988: 32: 269988: encoded
- 6: 192.. 223: 269958.. 269989: 32: 269989: encoded
- 7: 224.. 255: 269959.. 269990: 32: 269990: last,encoded,eof
- zero: 8 extents found
- # Verify RAID is fine, and note the file is mirrored on devices 1 & 2, by the data bytes.
- root@archiso /mnt # btrfs scrub start -BdR /mnt
- scrub device /dev/vda1 (id 1) done
- scrub started at Thu May 17 08:53:54 2018 and finished after 00:00:00
- data_extents_scrubbed: 24
- tree_extents_scrubbed: 7
- data_bytes_scrubbed: 1572864
- tree_bytes_scrubbed: 114688
- read_errors: 0
- csum_errors: 0
- verify_errors: 0
- no_csum: 384
- csum_discards: 0
- super_errors: 0
- malloc_errors: 0
- uncorrectable_errors: 0
- unverified_errors: 0
- corrected_errors: 0
- last_physical: 2169503744
- scrub device /dev/vda2 (id 2) done
- scrub started at Thu May 17 08:53:54 2018 and finished after 00:00:00
- data_extents_scrubbed: 24
- tree_extents_scrubbed: 1
- data_bytes_scrubbed: 1572864
- tree_bytes_scrubbed: 16384
- read_errors: 0
- csum_errors: 0
- verify_errors: 0
- no_csum: 384
- csum_discards: 0
- super_errors: 0
- malloc_errors: 0
- uncorrectable_errors: 0
- unverified_errors: 0
- corrected_errors: 0
- last_physical: 1083179008
- scrub device /dev/vda3 (id 3) done
- scrub started at Thu May 17 08:53:54 2018 and finished after 00:00:00
- data_extents_scrubbed: 0
- tree_extents_scrubbed: 8
- data_bytes_scrubbed: 0
- tree_bytes_scrubbed: 131072
- read_errors: 0
- csum_errors: 0
- verify_errors: 0
- no_csum: 0
- csum_discards: 0
- super_errors: 0
- malloc_errors: 0
- uncorrectable_errors: 0
- unverified_errors: 0
- corrected_errors: 0
- last_physical: 1083179008
- # Calculate the btrfs logical extent number, for the first file extent, starting at physical offset 269952 of 4k blocks.
- root@archiso /mnt # echo $[269952*4096]
- 1105723392
- # Read mirrored copies of the disk data (so, in compressed form) for the first file extent. Note btrfs-map-logical takes its last argument as a device in the volume, so it can be done offline, not a mountpoint, but that does NOT specify which mirrored copy will be written to the file as "-c" does that.
- root@archiso /mnt # btrfs-map-logical -l 1105723392 -b 4096 -o /root/1105723392.begin.copy1 -c 1 /dev/vda1
- mirror 1 logical 1105723392 physical 11010048 device /dev/vda2
- mirror 2 logical 1105723392 physical 1097334784 device /dev/vda1
- root@archiso /mnt # btrfs-map-logical -l 1105723392 -b 4096 -o /root/1105723392.begin.copy2 -c 2 /dev/vda1
- mirror 1 logical 1105723392 physical 11010048 device /dev/vda2
- mirror 2 logical 1105723392 physical 1097334784 device /dev/vda1
- # As expected, the mirrored copies are the same. Take a brief look at the compressed form.
- root@archiso /mnt # diff --brief /root/1105723392.begin.copy*
- root@archiso /mnt # xxd /root/1105723392.copy1 | head -n 5
- 00000000: 785e ecd0 010d 0000 00c2 a0f7 4f6d 0f07 x^..........Om..
- 00000010: 1128 0c18 3060 c080 0103 060c 1830 60c0 .(..0`.......0`.
- 00000020: 8081 f781 0100 00ff ffec d081 0c00 0000 ................
- 00000030: c020 7feb 7b7c 2114 9f01 0306 0c18 3060 . ..{|!.......0`
- 00000040: c080 0103 060c 1830 f01f 0800 00ff ffec .......0........
- root@archiso /mnt # xxd /root/1105723392.copy2 | head -n 5
- 00000000: 785e ecd0 010d 0000 00c2 a0f7 4f6d 0f07 x^..........Om..
- 00000010: 1128 0c18 3060 c080 0103 060c 1830 60c0 .(..0`.......0`.
- 00000020: 8081 f781 0100 00ff ffec d081 0c00 0000 ................
- 00000030: c020 7feb 7b7c 2114 9f01 0306 0c18 3060 . ..{|!.......0`
- 00000040: c080 0103 060c 1830 f01f 0800 00ff ffec .......0........
- # Simulate device 1 failure and replacement. Shouldn't have data loss as long as we get the replacement in without another drive failing.
- root@archiso /mnt # cd
- root@archiso ~ # umount /mnt
- root@archiso ~ # dd if=/dev/zero of=/dev/vda1 bs=1M
- dd: error writing '/dev/vda1': No space left on device
- 10241+0 records in
- 10240+0 records out
- 10737418240 bytes (11 GB, 10 GiB) copied, 22.4585 s, 478 MB/s
- dd if=/dev/zero of=/dev/vda1 bs=1M 0.01s user 6.50s system 28% cpu 22.460 total
- 1 root@archiso ~ # sync :(
- # Make sure check passes. Space cache warnings are documented as being able to be ignored, and btrfs easily sidesteps the problem and fixes them. They are usually indicative of something weird having happened, and we had a complete drive fail.
- root@archiso ~ # btrfs check /dev/vda2
- Checking filesystem on /dev/vda2
- UUID: b765aaea-c99c-4950-a347-354b28e6cc50
- checking extents
- checking free space cache
- failed to load free space cache for block group 30408704
- failed to load free space cache for block group 1104150528
- checking fs roots
- checking csums
- checking root refs
- found 688128 bytes used, no error found
- total csum bytes: 0
- total tree bytes: 131072
- total fs tree bytes: 32768
- total extent tree bytes: 16384
- btree space waste bytes: 121695
- file data blocks allocated: 557056
- referenced 1572864
- # Mount the volume degraded. Mounting with or without compression makes no difference to how replace will misbehave.
- root@archiso ~ # mount -o compress,degraded /dev/vda2 /mnt
- # Make sure scrub passes. (Not that there's any files with checksums.) Note all data bytes are on device 2, since 1 is missing.
- root@archiso ~ # btrfs scrub start -BdR /mnt
- WARNING: device 1 not present
- scrub device /dev/vda1 (id 1) canceled
- scrub started at Thu May 17 08:56:59 2018 and was aborted after 00:00:00
- data_extents_scrubbed: 0
- tree_extents_scrubbed: 0
- data_bytes_scrubbed: 0
- tree_bytes_scrubbed: 0
- read_errors: 0
- csum_errors: 0
- verify_errors: 0
- no_csum: 0
- csum_discards: 0
- super_errors: 0
- malloc_errors: 0
- uncorrectable_errors: 0
- unverified_errors: 0
- corrected_errors: 0
- last_physical: 0
- scrub device /dev/vda2 (id 2) done
- scrub started at Thu May 17 08:56:59 2018 and finished after 00:00:00
- data_extents_scrubbed: 16
- tree_extents_scrubbed: 1
- data_bytes_scrubbed: 557056
- tree_bytes_scrubbed: 16384
- read_errors: 0
- csum_errors: 0
- verify_errors: 0
- no_csum: 136
- csum_discards: 0
- super_errors: 0
- malloc_errors: 0
- uncorrectable_errors: 0
- unverified_errors: 0
- corrected_errors: 0
- last_physical: 1385168896
- scrub device /dev/vda3 (id 3) done
- scrub started at Thu May 17 08:56:59 2018 and finished after 00:00:00
- data_extents_scrubbed: 0
- tree_extents_scrubbed: 8
- data_bytes_scrubbed: 0
- tree_bytes_scrubbed: 131072
- read_errors: 0
- csum_errors: 0
- verify_errors: 0
- no_csum: 0
- csum_discards: 0
- super_errors: 0
- malloc_errors: 0
- uncorrectable_errors: 0
- unverified_errors: 0
- corrected_errors: 0
- last_physical: 1385168896
- # Check our file's integrity is intact in uncompressed form.
- root@archiso ~ # xxd /mnt/zero | head -n 5
- 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................
- 00000010: 0000 0000 0000 0000 0000 0000 0000 0000 ................
- 00000020: 0000 0000 0000 0000 0000 0000 0000 0000 ................
- 00000030: 0000 0000 0000 0000 0000 0000 0000 0000 ................
- 00000040: 0000 0000 0000 0000 0000 0000 0000 0000 ................
- # Check our file's integrity is intact in its on disk (compressed) form. Note since device 1 is missing, copy 1 is now on device 2, where before it was on device 1.
- root@archiso ~ # btrfs-map-logical -l 1105723392 -b 4096 -o /root/1105723392.degraded.copy1 -c 1 /dev/vda2
- mirror 1 logical 1105723392 physical 11010048 device /dev/vda2
- mirror 2 logical 1105723392 physical 1097334784 device (null)
- root@archiso ~ # xxd /root/1105723392.degraded.copy1| head -n 5
- 00000000: 785e ecd0 010d 0000 00c2 a0f7 4f6d 0f07 x^..........Om..
- 00000010: 1128 0c18 3060 c080 0103 060c 1830 60c0 .(..0`.......0`.
- 00000020: 8081 f781 0100 00ff ffec d081 0c00 0000 ................
- 00000030: c020 7feb 7b7c 2114 9f01 0306 0c18 3060 . ..{|!.......0`
- 00000040: c080 0103 060c 1830 f01f 0800 00ff ffec .......0........
- root@archiso ~ # diff --brief /root/1105723392.{begin,degraded}.copy1
- # Check our file's integrity, in that it can still be read and uncompressed.
- root@archiso ~ # cat /mnt/zero > /dev/null
- # Replace of (missing) device 1 with our new device 1.
- root@archiso ~ # btrfs replace start -B 1 /dev/vda1 /mnt
- root@archiso ~ # btrfs replace status /mnt
- Started on 17.May 08:58:14, finished on 17.May 08:58:14, 0 write errs, 0 uncorr. read errs
- # Unmount the volume and make sure btrfs check passes
- root@archiso ~ # umount /mnt
- root@archiso ~ # btrfs check /dev/vda1
- Checking filesystem on /dev/vda1
- UUID: b765aaea-c99c-4950-a347-354b28e6cc50
- checking extents
- checking free space cache
- checking fs roots
- checking csums
- checking root refs
- found 753664 bytes used, no error found
- total csum bytes: 0
- total tree bytes: 131072
- total fs tree bytes: 32768
- total extent tree bytes: 16384
- btree space waste bytes: 121191
- file data blocks allocated: 622592
- referenced 1638400
- # Mount the volume.
- root@archiso ~ # mount -o compress /dev/vda1 /mnt
- # Make sure scrub passes. (Still no files with checksums.) Note data bytes are now on devices 1 and 2. Also note no_csum used to be 136 and is now 152, there's an extra data_extent, 2 extra tree_extents, extra data and tree bytes, and last physical is later than before our drive failure. This surprises me, but I have no idea if it's important or somehow expected.
- root@archiso ~ # btrfs scrub start -BdR /mnt
- scrub device /dev/vda1 (id 1) done
- scrub started at Thu May 17 09:00:29 2018 and finished after 00:00:00
- data_extents_scrubbed: 17
- tree_extents_scrubbed: 7
- data_bytes_scrubbed: 622592
- tree_bytes_scrubbed: 114688
- read_errors: 0
- csum_errors: 0
- verify_errors: 0
- no_csum: 152
- csum_discards: 0
- super_errors: 0
- malloc_errors: 0
- uncorrectable_errors: 0
- unverified_errors: 0
- corrected_errors: 0
- last_physical: 2169503744
- scrub device /dev/vda2 (id 2) done
- scrub started at Thu May 17 09:00:29 2018 and finished after 00:00:00
- data_extents_scrubbed: 17
- tree_extents_scrubbed: 1
- data_bytes_scrubbed: 622592
- tree_bytes_scrubbed: 16384
- read_errors: 0
- csum_errors: 0
- verify_errors: 0
- no_csum: 152
- csum_discards: 0
- super_errors: 0
- malloc_errors: 0
- uncorrectable_errors: 0
- unverified_errors: 0
- corrected_errors: 0
- last_physical: 1418723328
- scrub device /dev/vda3 (id 3) done
- scrub started at Thu May 17 09:00:29 2018 and finished after 00:00:00
- data_extents_scrubbed: 0
- tree_extents_scrubbed: 8
- data_bytes_scrubbed: 0
- tree_bytes_scrubbed: 131072
- read_errors: 0
- csum_errors: 0
- verify_errors: 0
- no_csum: 0
- csum_discards: 0
- super_errors: 0
- malloc_errors: 0
- uncorrectable_errors: 0
- unverified_errors: 0
- corrected_errors: 0
- last_physical: 1418723328
- # Go straight to checking our file's integrity by checking its on disk (compressed) form. Note copy 1 is now on device 2, and copy 2 is now on device 1.
- # Oh, no! These shouldn't differ! We have mirrored copies that are different, and the data that is actually read off the disk will depend on which device btrfs goes to.
- root@archiso ~ # btrfs-map-logical -l 1105723392 -b 4096 -o /root/1105723392.replaced.copy1 -c 1 /dev/vda1
- mirror 1 logical 1105723392 physical 11010048 device /dev/vda2
- mirror 2 logical 1105723392 physical 1097334784 device /dev/vda1
- root@archiso ~ # btrfs-map-logical -l 1105723392 -b 4096 -o /root/1105723392.replaced.copy2 -c 2 /dev/vda1
- mirror 1 logical 1105723392 physical 11010048 device /dev/vda2
- mirror 2 logical 1105723392 physical 1097334784 device /dev/vda1
- root@archiso ~ # diff --brief /root/1105723392.replaced.copy*
- Files /root/1105723392.replaced.copy1 and /root/1105723392.replaced.copy2 differ
- # Copy 1 is proper, which is now the one on device 2. But, copy 2 on device 1 (the drive we just added as a replacement) contains uncompressed data. Writing all 1 bits to the file at the beginning instead of 0's, copy 2 shows all 'FF' bytes here.
- 1 root@archiso ~ # xxd /root/1105723392.replaced.copy1 | head -n 5 :(
- 00000000: 785e ecd0 010d 0000 00c2 a0f7 4f6d 0f07 x^..........Om..
- 00000010: 1128 0c18 3060 c080 0103 060c 1830 60c0 .(..0`.......0`.
- 00000020: 8081 f781 0100 00ff ffec d081 0c00 0000 ................
- 00000030: c020 7feb 7b7c 2114 9f01 0306 0c18 3060 . ..{|!.......0`
- 00000040: c080 0103 060c 1830 f01f 0800 00ff ffec .......0........
- root@archiso ~ # xxd /root/1105723392.replaced.copy2 | head -n 5
- 00000000: 0000 0000 0000 0000 0000 0000 0000 0000 ................
- 00000010: 0000 0000 0000 0000 0000 0000 0000 0000 ................
- 00000020: 0000 0000 0000 0000 0000 0000 0000 0000 ................
- 00000030: 0000 0000 0000 0000 0000 0000 0000 0000 ................
- 00000040: 0000 0000 0000 0000 0000 0000 0000 0000 ................
- # Reading and uncompressing the file has undefined behavior. If btrfs goes to the disk with compressed data, mirror 1 (device 2), like it did in this example, user has hidden file corruption.
- root@archiso ~ # cat /mnt/zero
- root@archiso ~ # umount /mnt
- # To force it to go to the disk with uncompressed data, let's simulate a failure on device 2, so the only mirror it has left is on device 1.
- root@archiso ~ # dd if=/dev/zero of=/dev/vda2 bs=1M
- dd: error writing '/dev/vda2': No space left on device
- 10241+0 records in
- 10240+0 records out
- 10737418240 bytes (11 GB, 10 GiB) copied, 21.3278 s, 503 MB/s
- 1 root@archiso ~ # mount -o compress,degraded /dev/vda1 /mnt :(
- # What happens here depends on the already uncompressed data, and how the decompression algorithm handles it. All zero's like this example seems to always give an Input/output error. Other data has been seen to cause random kernel memory corruption and likely bring the system down with it without a helpful oops or stack trace pointing to the culprit.
- root@archiso ~ # cat /mnt/zero
- cat: /mnt/zero: Input/output error
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement