Merge pull request #22125 from DaanDeMeyer/copy-holes

shared: Copy holes in sparse files in copy_bytes_full()
This commit is contained in:
Luca Boccassi
2022-01-17 21:15:14 +00:00
committed by GitHub
4 changed files with 86 additions and 1 deletions

View File

@@ -179,7 +179,7 @@ static void journald_file_set_offline_internal(JournaldFile *f) {
log_debug_errno(r, "Failed to re-enable copy-on-write for %s: %m, rewriting file", f->file->path);
r = copy_file_atomic(f->file->path, f->file->path, f->file->mode, 0, FS_NOCOW_FL, COPY_REPLACE | COPY_FSYNC);
r = copy_file_atomic(f->file->path, f->file->path, f->file->mode, 0, FS_NOCOW_FL, COPY_REPLACE | COPY_FSYNC | COPY_HOLES);
if (r < 0) {
log_debug_errno(r, "Failed to rewrite %s: %m", f->file->path);
continue;

View File

@@ -202,6 +202,46 @@ int copy_bytes_full(
if (max_bytes != UINT64_MAX && m > max_bytes)
m = max_bytes;
if (copy_flags & COPY_HOLES) {
off_t c, e;
c = lseek(fdf, 0, SEEK_CUR);
if (c < 0)
return -errno;
/* To see if we're in a hole, we search for the next data offset. */
e = lseek(fdf, c, SEEK_DATA);
if (e < 0 && errno == ENXIO)
/* If errno == ENXIO, that means we've reached the final hole of the file and
* that hole isn't followed by more data. */
e = lseek(fdf, 0, SEEK_END);
if (e < 0)
return -errno;
/* If we're in a hole (current offset is not a data offset), create a hole of the
* same size in the target file. */
if (e > c && lseek(fdt, e - c, SEEK_CUR) < 0)
return -errno;
c = e; /* Set c to the start of the data segment. */
/* After copying a potential hole, find the end of the data segment by looking for
* the next hole. If we get ENXIO, we're at EOF. */
e = lseek(fdf, c, SEEK_HOLE);
if (e < 0) {
if (errno == ENXIO)
break;
return -errno;
}
/* SEEK_HOLE modifies the file offset so we need to move back to the initial offset. */
if (lseek(fdf, c, SEEK_SET) < 0)
return -errno;
/* Make sure we're not copying more than the current data segment. */
m = MIN(m, (size_t) e - c);
}
/* First try copy_file_range(), unless we already tried */
if (try_cfr) {
n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);

View File

@@ -24,6 +24,7 @@ typedef enum CopyFlags {
COPY_FSYNC_FULL = 1 << 11, /* fsync_full() after we are done */
COPY_SYNCFS = 1 << 12, /* syncfs() the *top-level* dir after we are done */
COPY_ALL_XATTRS = 1 << 13, /* Preserve all xattrs when copying, not just those in the user namespace */
COPY_HOLES = 1 << 14, /* Copy holes */
} CopyFlags;
typedef int (*copy_progress_bytes_t)(uint64_t n_bytes, void *userdata);

View File

@@ -323,4 +323,48 @@ TEST(copy_proc) {
assert_se(!isempty(a));
}
TEST_RET(copy_holes) {
char fn[] = "/var/tmp/test-copy-hole-fd-XXXXXX";
char fn_copy[] = "/var/tmp/test-copy-hole-fd-XXXXXX";
struct stat stat;
int r, fd, fd_copy;
fd = mkostemp_safe(fn);
assert_se(fd >= 0);
fd_copy = mkostemp_safe(fn_copy);
assert_se(fd >= 0);
r = RET_NERRNO(fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 1));
if (ERRNO_IS_NOT_SUPPORTED(r))
return log_tests_skipped("Filesystem doesn't support hole punching");
assert_se(r >= 0);
/* We need to make sure to create a large enough hole and to write some data after it, otherwise
* filesystems (btrfs) might silently discard it. */
assert_se(lseek(fd, 1024 * 1024, SEEK_CUR) >= 0);
assert_se(write(fd, "abc", strlen("abc")) >= 0);
assert_se(lseek(fd, 0, SEEK_SET) >= 0);
assert_se(copy_bytes(fd, fd_copy, UINT64_MAX, COPY_HOLES) >= 0);
/* Test that the hole starts at the beginning of the file. */
assert_se(lseek(fd_copy, 0, SEEK_HOLE) == 0);
/* Test that the hole has the expected size. */
assert_se(lseek(fd_copy, 0, SEEK_DATA) == 1024 * 1024);
/* Test that the copied file has the correct size. */
assert_se(fstat(fd_copy, &stat) >= 0);
assert_se(stat.st_size == 1024 * 1024 + strlen("abc"));
close(fd);
close(fd_copy);
unlink(fn);
unlink(fn_copy);
return 0;
}
DEFINE_TEST_MAIN(LOG_DEBUG);