diff --git a/docs/JOURNAL_FILE_FORMAT.md b/docs/JOURNAL_FILE_FORMAT.md index 5f7f97c1b8..2d0debd858 100644 --- a/docs/JOURNAL_FILE_FORMAT.md +++ b/docs/JOURNAL_FILE_FORMAT.md @@ -177,6 +177,9 @@ _packed_ struct Header { /* Added in 246 */ le64_t data_hash_chain_depth; le64_t field_hash_chain_depth; + /* Added in 252 */ + le32_t tail_entry_array_offset; \ + le32_t tail_entry_array_n_entries; \ }; ``` @@ -231,6 +234,8 @@ became too frequent. Similar, **field_hash_chain_depth** is a counter of the deepest chain in the field hash table, minus one. +**tail_entry_array_offset** and **tail_entry_array_n_entries** allow immediate +access to the last entry array in the global entry array chain. ## Extensibility @@ -397,7 +402,16 @@ _packed_ struct DataObject { le64_t entry_offset; /* the first array entry we store inline */ le64_t entry_array_offset; le64_t n_entries; - uint8_t payload[]; + union { \ + struct { \ + uint8_t payload[] ; \ + } regular; \ + struct { \ + le32_t tail_entry_array_offset; \ + le32_t tail_entry_array_n_entries; \ + uint8_t payload[]; \ + } compact; \ + }; \ }; ``` @@ -430,6 +444,9 @@ OBJECT_COMPRESSED_XZ/OBJECT_COMPRESSED_LZ4/OBJECT_COMPRESSED_ZSTD is set in the `ObjectHeader`, in which case the payload is compressed with the indicated compression algorithm. +If the `HEADER_INCOMPATIBLE_COMPACT` flag is set, Two extra fields are stored to +allow immediate access to the tail entry array in the DATA object's entry array +chain. ## Field Objects diff --git a/src/libsystemd/sd-journal/journal-authenticate.c b/src/libsystemd/sd-journal/journal-authenticate.c index 3965f3f589..1cb8943389 100644 --- a/src/libsystemd/sd-journal/journal-authenticate.c +++ b/src/libsystemd/sd-journal/journal-authenticate.c @@ -248,7 +248,7 @@ int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uin case OBJECT_DATA: /* All but hash and payload are mutable */ gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash)); - gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload)); + gcry_md_write(f->hmac, journal_file_data_payload_field(f, o), le64toh(o->object.size) - journal_file_data_payload_offset(f)); break; case OBJECT_FIELD: diff --git a/src/libsystemd/sd-journal/journal-def.h b/src/libsystemd/sd-journal/journal-def.h index f04a2298c4..8f994b0178 100644 --- a/src/libsystemd/sd-journal/journal-def.h +++ b/src/libsystemd/sd-journal/journal-def.h @@ -65,8 +65,17 @@ struct ObjectHeader { le64_t entry_offset; /* the first array entry we store inline */ \ le64_t entry_array_offset; \ le64_t n_entries; \ - uint8_t payload[]; \ - } + union { \ + struct { \ + uint8_t payload[0]; \ + } regular; \ + struct { \ + le32_t tail_entry_array_offset; \ + le32_t tail_entry_array_n_entries; \ + uint8_t payload[0]; \ + } compact; \ + }; \ +} struct DataObject DataObject__contents; struct DataObject__packed DataObject__contents _packed_; @@ -222,12 +231,15 @@ enum { /* Added in 246 */ \ le64_t data_hash_chain_depth; \ le64_t field_hash_chain_depth; \ + /* Added in 252 */ \ + le32_t tail_entry_array_offset; \ + le32_t tail_entry_array_n_entries; \ } struct Header struct_Header__contents; struct Header__packed struct_Header__contents _packed_; assert_cc(sizeof(struct Header) == sizeof(struct Header__packed)); -assert_cc(sizeof(struct Header) == 256); +assert_cc(sizeof(struct Header) == 264); #define FSS_HEADER_SIGNATURE \ ((const char[]) { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' }) diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index 67bd2305ad..7dbbd4889c 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -662,7 +662,7 @@ static int journal_file_move_to( return mmap_cache_fd_get(f->cache_fd, type_to_context(type), keep_always, offset, size, &f->last_stat, ret); } -static uint64_t minimum_header_size(Object *o) { +static uint64_t minimum_header_size(JournalFile *f, Object *o) { static const uint64_t table[] = { [OBJECT_DATA] = sizeof(DataObject), @@ -674,15 +674,22 @@ static uint64_t minimum_header_size(Object *o) { [OBJECT_TAG] = sizeof(TagObject), }; + assert(f); + assert(o); + + if (o->object.type == OBJECT_DATA) + return journal_file_data_payload_offset(f); + if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0) return sizeof(ObjectHeader); return table[o->object.type]; } -static int check_object_header(Object *o, ObjectType type, uint64_t offset) { +static int check_object_header(JournalFile *f, Object *o, ObjectType type, uint64_t offset) { uint64_t s; + assert(f); assert(o); s = le64toh(READ_NOW(o->object.size)); @@ -706,7 +713,7 @@ static int check_object_header(Object *o, ObjectType type, uint64_t offset) { "Attempt to move to object of unexpected type: %" PRIu64, offset); - if (s < minimum_header_size(o)) + if (s < minimum_header_size(f, o)) return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Attempt to move to truncated object: %" PRIu64, offset); @@ -728,10 +735,10 @@ static int check_object(JournalFile *f, Object *o, uint64_t offset) { le64toh(o->data.n_entries), offset); - if (le64toh(o->object.size) <= offsetof(Object, data.payload)) + if (le64toh(o->object.size) <= journal_file_data_payload_offset(f)) return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Bad object size (<= %zu): %" PRIu64 ": %" PRIu64, - offsetof(Object, data.payload), + journal_file_data_payload_offset(f), le64toh(o->object.size), offset); @@ -883,7 +890,7 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset if (r < 0) return r; - r = check_object_header(o, type, offset); + r = check_object_header(f, o, type, offset); if (r < 0) return r; @@ -891,7 +898,7 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset if (r < 0) return r; - r = check_object_header(o, type, offset); + r = check_object_header(f, o, type, offset); if (r < 0) return r; @@ -935,11 +942,11 @@ int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t of "Failed to read short object at offset: %" PRIu64, offset); - r = check_object_header(&o, type, offset); + r = check_object_header(f, &o, type, offset); if (r < 0) return r; - if ((size_t) n < minimum_header_size(&o)) + if ((size_t) n < minimum_header_size(f, &o)) return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading object: %" PRIu64, offset); @@ -1541,15 +1548,35 @@ static int journal_file_append_field( return 0; } +static Compression maybe_compress_payload(JournalFile *f, uint8_t *dst, const uint8_t *src, uint64_t size, size_t *rsize) { + Compression compression = COMPRESSION_NONE; + +#if HAVE_COMPRESSION + if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) { + compression = compress_blob(src, size, dst, size - 1, rsize); + if (compression > 0) { + log_debug("Compressed data object %"PRIu64" -> %zu using %s", + size, *rsize, compression_to_string(compression)); + } else + /* Compression didn't work, we don't really care why, let's continue without compression */ + compression = COMPRESSION_NONE; + } +#endif + + return compression; +} + static int journal_file_append_data( JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *ret_offset) { - uint64_t hash, p, fp, osize; + uint64_t hash, p, osize; Object *o, *fo; - int r, compression = 0; + size_t rsize = 0; + Compression c; const void *eq; + int r; assert(f); @@ -1568,32 +1595,20 @@ static int journal_file_append_data( if (!eq) return -EINVAL; - osize = offsetof(Object, data.payload) + size; + osize = journal_file_data_payload_offset(f) + size; r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p); if (r < 0) return r; o->data.hash = htole64(hash); -#if HAVE_COMPRESSION - if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) { - size_t rsize = 0; + c = maybe_compress_payload(f, journal_file_data_payload_field(f, o), data, size, &rsize); - compression = compress_blob(data, size, o->data.payload, size - 1, &rsize); - if (compression > COMPRESSION_NONE) { - o->object.size = htole64(offsetof(Object, data.payload) + rsize); - o->object.flags |= COMPRESSION_TO_OBJECT_FLAG(compression); - - log_debug("Compressed data object %"PRIu64" -> %zu using %s", - size, rsize, compression_to_string(compression)); - } else - /* Compression didn't work, we don't really care why, let's continue without compression */ - compression = COMPRESSION_NONE; - } -#endif - - if (compression == 0) - memcpy_safe(o->data.payload, data, size); + if (c != COMPRESSION_NONE) { + o->object.size = htole64(journal_file_data_payload_offset(f) + rsize); + o->object.flags |= COMPRESSION_TO_OBJECT_FLAG(c); + } else + memcpy_safe(journal_file_data_payload_field(f, o), data, size); r = journal_file_link_data(f, o, p, hash); if (r < 0) @@ -1611,7 +1626,7 @@ static int journal_file_append_data( #endif /* Create field object ... */ - r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp); + r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, NULL); if (r < 0) return r; @@ -1715,17 +1730,17 @@ int journal_file_data_payload( } size = le64toh(READ_NOW(o->object.size)); - if (size < offsetof(Object, data.payload)) + if (size < journal_file_data_payload_offset(f)) return -EBADMSG; - size -= offsetof(Object, data.payload); + size -= journal_file_data_payload_offset(f); c = COMPRESSION_FROM_OBJECT(o); if (c < 0) return -EPROTONOSUPPORT; - return maybe_decompress_payload(f, o->data.payload, size, c, field, field_length, data_threshold, - ret_data, ret_size); + return maybe_decompress_payload(f, journal_file_data_payload_field(f, o), size, c, field, + field_length, data_threshold, ret_data, ret_size); } uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) { @@ -1788,6 +1803,8 @@ static void write_entry_array_item(JournalFile *f, Object *o, uint64_t i, uint64 static int link_entry_into_array(JournalFile *f, le64_t *first, le64_t *idx, + le32_t *tail, + le32_t *tidx, uint64_t p) { int r; uint64_t n = 0, ap = 0, q, i, a, hidx; @@ -1799,8 +1816,9 @@ static int link_entry_into_array(JournalFile *f, assert(idx); assert(p > 0); - a = le64toh(*first); - i = hidx = le64toh(READ_NOW(*idx)); + a = tail ? le32toh(*tail) : le64toh(*first); + hidx = le64toh(READ_NOW(*idx)); + i = tidx ? le32toh(READ_NOW(*tidx)) : hidx; while (a > 0) { r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); @@ -1811,6 +1829,8 @@ static int link_entry_into_array(JournalFile *f, if (i < n) { write_entry_array_item(f, o, i, p); *idx = htole64(hidx + 1); + if (tidx) + *tidx = htole32(le32toh(*tidx) + 1); return 0; } @@ -1851,10 +1871,15 @@ static int link_entry_into_array(JournalFile *f, o->entry_array.next_entry_array_offset = htole64(q); } + if (tail) + *tail = htole32(q); + if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays)) f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1); *idx = htole64(hidx + 1); + if (tidx) + *tidx = htole32(1); return 0; } @@ -1863,6 +1888,8 @@ static int link_entry_into_array_plus_one(JournalFile *f, le64_t *extra, le64_t *first, le64_t *idx, + le32_t *tail, + le32_t *tidx, uint64_t p) { uint64_t hidx; @@ -1883,7 +1910,7 @@ static int link_entry_into_array_plus_one(JournalFile *f, le64_t i; i = htole64(hidx - 1); - r = link_entry_into_array(f, first, &i, p); + r = link_entry_into_array(f, first, &i, tail, tidx, p); if (r < 0) return r; } @@ -1907,6 +1934,8 @@ static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offs &o->data.entry_offset, &o->data.entry_array_offset, &o->data.n_entries, + JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_offset : NULL, + JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_n_entries : NULL, offset); } @@ -1933,6 +1962,8 @@ static int journal_file_link_entry( r = link_entry_into_array(f, &f->header->entry_array_offset, &f->header->n_entries, + JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_offset) ? &f->header->tail_entry_array_offset : NULL, + JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_n_entries) ? &f->header->tail_entry_array_n_entries : NULL, offset); if (r < 0) return r; diff --git a/src/libsystemd/sd-journal/journal-file.h b/src/libsystemd/sd-journal/journal-file.h index 7976953793..e5b9765471 100644 --- a/src/libsystemd/sd-journal/journal-file.h +++ b/src/libsystemd/sd-journal/journal-file.h @@ -223,6 +223,16 @@ int journal_file_data_payload( void **ret_data, size_t *ret_size); +static inline size_t journal_file_data_payload_offset(JournalFile *f) { + return JOURNAL_HEADER_COMPACT(f->header) + ? offsetof(Object, data.compact.payload) + : offsetof(Object, data.regular.payload); +} + +static inline uint8_t* journal_file_data_payload_field(JournalFile *f, Object *o) { + return JOURNAL_HEADER_COMPACT(f->header) ? o->data.compact.payload : o->data.regular.payload; +} + uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) _pure_; static inline uint64_t journal_file_entry_array_item(JournalFile *f, Object *o, size_t i) { diff --git a/src/libsystemd/sd-journal/journal-verify.c b/src/libsystemd/sd-journal/journal-verify.c index 37d2a656b2..8b2c468a0b 100644 --- a/src/libsystemd/sd-journal/journal-verify.c +++ b/src/libsystemd/sd-journal/journal-verify.c @@ -170,16 +170,16 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o return -EBADMSG; } - if (le64toh(o->object.size) - offsetof(Object, data.payload) <= 0) { + if (le64toh(o->object.size) - journal_file_data_payload_offset(f) <= 0) { error(offset, "Bad object size (<= %zu): %"PRIu64, - offsetof(Object, data.payload), + journal_file_data_payload_offset(f), le64toh(o->object.size)); return -EBADMSG; } h1 = le64toh(o->data.hash); - r = hash_payload(f, o, offset, o->data.payload, - le64toh(o->object.size) - offsetof(Object, data.payload), + r = hash_payload(f, o, offset, journal_file_data_payload_field(f, o), + le64toh(o->object.size) - journal_file_data_payload_offset(f), &h2); if (r < 0) return r;