mirror of
https://github.com/Dasharo/systemd.git
synced 2026-03-06 15:02:31 -08:00
journalctl: find boot ID more gracefully in corrupted journal
In discover_next_boot(), first we find a new boot ID based on the value stored in the entry object. Then, find the tail (or head when we are going upwards) entry of the boot based on the _BOOT_ID= field data. If boot IDs of an entry in the entry object and _BOOT_ID field data are inconsistent, which may happen on corrupted journal, then previously discover_next_boot() failed with -ENODATA. This makes the function check if the two boot IDs in each entry are consistent, and skip the entry if not. Fixes the failure of `journalctl -b -1` for 'truncated' journal: https://github.com/systemd/systemd/pull/29334#issuecomment-1736567951
This commit is contained in:
@@ -1836,7 +1836,7 @@ static int discover_next_boot(
|
||||
bool advance_older,
|
||||
BootId *ret) {
|
||||
|
||||
BootId boot;
|
||||
_cleanup_set_free_ Set *broken_ids = NULL;
|
||||
int r;
|
||||
|
||||
assert(j);
|
||||
@@ -1853,7 +1853,10 @@ static int discover_next_boot(
|
||||
* we can actually advance to a *different* boot. */
|
||||
sd_journal_flush_matches(j);
|
||||
|
||||
do {
|
||||
for (;;) {
|
||||
sd_id128_t *id_dup;
|
||||
BootId boot;
|
||||
|
||||
r = sd_journal_step_one(j, !advance_older);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@@ -1874,38 +1877,106 @@ static int discover_next_boot(
|
||||
* speed things up, but let's not trust that it is complete, and hence, manually advance as
|
||||
* necessary. */
|
||||
|
||||
} while (sd_id128_equal(boot.id, previous_boot_id));
|
||||
if (!sd_id128_is_null(previous_boot_id) && sd_id128_equal(boot.id, previous_boot_id))
|
||||
continue;
|
||||
|
||||
r = sd_journal_get_realtime_usec(j, &boot.first_usec);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (set_contains(broken_ids, &boot.id))
|
||||
continue;
|
||||
|
||||
/* Now seek to the last occurrence of this boot ID. */
|
||||
r = add_match_boot_id(j, boot.id);
|
||||
if (r < 0)
|
||||
return r;
|
||||
/* Yay, we found a new boot ID from the entry object. Let's check there exist corresponding
|
||||
* entries matching with the _BOOT_ID= data. */
|
||||
|
||||
if (advance_older)
|
||||
r = sd_journal_seek_head(j);
|
||||
else
|
||||
r = sd_journal_seek_tail(j);
|
||||
if (r < 0)
|
||||
return r;
|
||||
r = add_match_boot_id(j, boot.id);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_journal_step_one(j, advance_older);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0)
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(ENODATA),
|
||||
"Whoopsie! We found a boot ID but can't read its last entry."); /* This shouldn't happen. We just came from this very boot ID. */
|
||||
/* First, seek to the first (or the last when we are going upwards) occurrence of this boot ID.
|
||||
* You may think this is redundant. Yes, that's redundant unless the journal is corrupted.
|
||||
* But when the journal is corrupted, especially, badly 'truncated', then the below may fail.
|
||||
* See https://github.com/systemd/systemd/pull/29334#issuecomment-1736567951. */
|
||||
if (advance_older)
|
||||
r = sd_journal_seek_tail(j);
|
||||
else
|
||||
r = sd_journal_seek_head(j);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_journal_get_realtime_usec(j, &boot.last_usec);
|
||||
if (r < 0)
|
||||
return r;
|
||||
r = sd_journal_step_one(j, 0);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0) {
|
||||
log_debug("Whoopsie! We found a boot ID %s but can't read its first entry. "
|
||||
"The journal seems to be corrupted. Ignoring the boot ID.",
|
||||
SD_ID128_TO_STRING(boot.id));
|
||||
goto try_again;
|
||||
}
|
||||
|
||||
sd_journal_flush_matches(j);
|
||||
*ret = boot;
|
||||
return 1;
|
||||
r = sd_journal_get_realtime_usec(j, &boot.first_usec);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Next, seek to the last occurrence of this boot ID. */
|
||||
if (advance_older)
|
||||
r = sd_journal_seek_head(j);
|
||||
else
|
||||
r = sd_journal_seek_tail(j);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_journal_step_one(j, 0);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0) {
|
||||
log_debug("Whoopsie! We found a boot ID %s but can't read its last entry. "
|
||||
"The journal seems to be corrupted. Ignoring the boot ID.",
|
||||
SD_ID128_TO_STRING(boot.id));
|
||||
goto try_again;
|
||||
}
|
||||
|
||||
r = sd_journal_get_realtime_usec(j, &boot.last_usec);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
sd_journal_flush_matches(j);
|
||||
*ret = boot;
|
||||
return 1;
|
||||
|
||||
try_again:
|
||||
/* Save the bad boot ID. */
|
||||
id_dup = newdup(sd_id128_t, &boot.id, 1);
|
||||
if (!id_dup)
|
||||
return -ENOMEM;
|
||||
|
||||
r = set_ensure_consume(&broken_ids, &id128_hash_ops_free, id_dup);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Move to the previous position again. */
|
||||
sd_journal_flush_matches(j);
|
||||
|
||||
if (!sd_id128_is_null(previous_boot_id)) {
|
||||
r = add_match_boot_id(j, previous_boot_id);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (advance_older)
|
||||
r = sd_journal_seek_head(j);
|
||||
else
|
||||
r = sd_journal_seek_tail(j);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_journal_step_one(j, 0);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0)
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(ENODATA),
|
||||
"Whoopsie! Cannot seek to the last entry of boot %s.",
|
||||
SD_ID128_TO_STRING(previous_boot_id));
|
||||
|
||||
sd_journal_flush_matches(j);
|
||||
}
|
||||
}
|
||||
|
||||
int journal_find_boot_by_id(sd_journal *j, sd_id128_t boot_id) {
|
||||
|
||||
@@ -233,6 +233,6 @@ done < <(find /test-journals/no-rtc -name "*.zst")
|
||||
|
||||
journalctl --directory="$JOURNAL_DIR" --list-boots --output=json >/tmp/lb1
|
||||
diff -u /tmp/lb1 - <<'EOF'
|
||||
[{"index":-3,"boot_id":"5ea5fc4f82a14186b5332a788ef9435e","first_entry":1666569600994371,"last_entry":1666584266223608},{"index":-2,"boot_id":"bea6864f21ad4c9594c04a99d89948b0","first_entry":1666584266731785,"last_entry":1666584347230411},{"index":-1,"boot_id":"4c708e1fd0744336be16f3931aa861fb","first_entry":1666584348378271,"last_entry":1666584354649355},{"index":0,"boot_id":"35e8501129134edd9df5267c49f744a4","first_entry":1666584356661527,"last_entry":1666584438086856}]
|
||||
[{"index":-3,"boot_id":"5ea5fc4f82a14186b5332a788ef9435e","first_entry":1666569600994371,"last_entry":1666584266223608},{"index":-2,"boot_id":"bea6864f21ad4c9594c04a99d89948b0","first_entry":1666569601005945,"last_entry":1666584347230411},{"index":-1,"boot_id":"4c708e1fd0744336be16f3931aa861fb","first_entry":1666569601017222,"last_entry":1666584354649355},{"index":0,"boot_id":"35e8501129134edd9df5267c49f744a4","first_entry":1666569601009823,"last_entry":1666584438086856}]
|
||||
EOF
|
||||
rm -rf "$JOURNAL_DIR" /tmp/lb1
|
||||
|
||||
Reference in New Issue
Block a user