From f8ff195ef1b9b84111f1872d70919fbfb38f2a5f Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 14 Aug 2020 23:39:16 +0206 Subject: docs: vmcoreinfo: add lockless printk ringbuffer vmcoreinfo With the introduction of the lockless printk ringbuffer, the VMCOREINFO relating to the kernel log buffer was changed. Update the documentation to match those changes. Fixes: 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer") Reported-by: Nick Desaulniers Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200814213316.6394-1-john.ogness@linutronix.de --- Documentation/admin-guide/kdump/vmcoreinfo.rst | 131 +++++++++++++++++++------ 1 file changed, 102 insertions(+), 29 deletions(-) (limited to 'Documentation/admin-guide') diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index e4ee8b2db604..703bb9f92a39 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -184,50 +184,123 @@ from this. Free areas descriptor. User-space tools use this value to iterate the free_area ranges. MAX_ORDER is used by the zone buddy allocator. -log_first_idx +prb +--- + +A pointer to the printk ringbuffer (struct printk_ringbuffer). This +may be pointing to the static boot ringbuffer or the dynamically +allocated ringbuffer, depending on when the the core dump occurred. +Used by user-space tools to read the active kernel log buffer. + +printk_rb_static +---------------- + +A pointer to the static boot printk ringbuffer. If @prb has a +different value, this is useful for viewing the initial boot messages, +which may have been overwritten in the dynamically allocated +ringbuffer. + +clear_seq +--------- + +The sequence number of the printk() record after the last clear +command. It indicates the first record after the last +SYSLOG_ACTION_CLEAR, like issued by 'dmesg -c'. Used by user-space +tools to dump a subset of the dmesg log. + +printk_ringbuffer +----------------- + +The size of a printk_ringbuffer structure. This structure contains all +information required for accessing the various components of the +kernel log buffer. + +(printk_ringbuffer, desc_ring|text_data_ring|dict_data_ring|fail) +----------------------------------------------------------------- + +Offsets for the various components of the printk ringbuffer. Used by +user-space tools to view the kernel log buffer without requiring the +declaration of the structure. + +prb_desc_ring ------------- -Index of the first record stored in the buffer log_buf. Used by -user-space tools to read the strings in the log_buf. +The size of the prb_desc_ring structure. This structure contains +information about the set of record descriptors. -log_buf -------- +(prb_desc_ring, count_bits|descs|head_id|tail_id) +------------------------------------------------- + +Offsets for the fields describing the set of record descriptors. Used +by user-space tools to be able to traverse the descriptors without +requiring the declaration of the structure. + +prb_desc +-------- + +The size of the prb_desc structure. This structure contains +information about a single record descriptor. + +(prb_desc, info|state_var|text_blk_lpos|dict_blk_lpos) +------------------------------------------------------ + +Offsets for the fields describing a record descriptors. Used by +user-space tools to be able to read descriptors without requiring +the declaration of the structure. + +prb_data_blk_lpos +----------------- + +The size of the prb_data_blk_lpos structure. This structure contains +information about where the text or dictionary data (data block) is +located within the respective data ring. + +(prb_data_blk_lpos, begin|next) +------------------------------- -Console output is written to the ring buffer log_buf at index -log_first_idx. Used to get the kernel log. +Offsets for the fields describing the location of a data block. Used +by user-space tools to be able to locate data blocks without +requiring the declaration of the structure. -log_buf_len +printk_info ----------- -log_buf's length. +The size of the printk_info structure. This structure contains all +the meta-data for a record. -clear_idx ---------- +(printk_info, seq|ts_nsec|text_len|dict_len|caller_id) +------------------------------------------------------ -The index that the next printk() record to read after the last clear -command. It indicates the first record after the last SYSLOG_ACTION -_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump -the dmesg log. +Offsets for the fields providing the meta-data for a record. Used by +user-space tools to be able to read the information without requiring +the declaration of the structure. -log_next_idx ------------- +prb_data_ring +------------- -The index of the next record to store in the buffer log_buf. Used to -compute the index of the current buffer position. +The size of the prb_data_ring structure. This structure contains +information about a set of data blocks. -printk_log ----------- +(prb_data_ring, size_bits|data|head_lpos|tail_lpos) +--------------------------------------------------- -The size of a structure printk_log. Used to compute the size of -messages, and extract dmesg log. It encapsulates header information for -log_buf, such as timestamp, syslog level, etc. +Offsets for the fields describing a set of data blocks. Used by +user-space tools to be able to access the data blocks without +requiring the declaration of the structure. -(printk_log, ts_nsec|len|text_len|dict_len) -------------------------------------------- +atomic_long_t +------------- + +The size of the atomic_long_t structure. Used by user-space tools to +be able to copy the full structure, regardless of its +architecture-specific implementation. + +(atomic_long_t, counter) +------------------------ -It represents field offsets in struct printk_log. User space tools -parse it and check whether the values of printk_log's members have been -changed. +Offset for the long value of an atomic_long_t variable. Used by +user-space tools to access the long value without requiring the +architecture-specific declaration. (free_area.free_list, MIGRATE_TYPES) ------------------------------------ -- cgit v1.2.3 From e60768311af854734ce2bbfc50f24cff67b54a91 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 14 Aug 2020 23:31:25 +0206 Subject: scripts/gdb: update for lockless printk ringbuffer With the introduction of the lockless printk ringbuffer, the data structure for the kernel log buffer was changed. Update the gdb scripts to be able to parse/print the new log buffer structure. Fixes: 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer") Signed-off-by: John Ogness Reported-by: Nick Desaulniers Tested-by: Nick Desaulniers Tested-by: Petr Mladek [akpm@linux-foundation.org: A typo fix.] Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200814212525.6118-3-john.ogness@linutronix.de --- Documentation/admin-guide/kdump/gdbmacros.txt | 153 ++++++++++++++++++-------- scripts/gdb/linux/dmesg.py | 139 ++++++++++++++++------- 2 files changed, 209 insertions(+), 83 deletions(-) (limited to 'Documentation/admin-guide') diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt index 220d0a80ca2c..7adece30237e 100644 --- a/Documentation/admin-guide/kdump/gdbmacros.txt +++ b/Documentation/admin-guide/kdump/gdbmacros.txt @@ -170,57 +170,111 @@ document trapinfo address the kernel panicked. end -define dump_log_idx - set $idx = $arg0 +define dump_record + set var $desc = $arg0 if ($argc > 1) - set $prev_flags = $arg1 + set var $prev_flags = $arg1 else - set $prev_flags = 0 + set var $prev_flags = 0 end - set $msg = ((struct printk_log *) (log_buf + $idx)) - set $prefix = 1 - set $newline = 1 - set $log = log_buf + $idx + sizeof(*$msg) - - # prev & LOG_CONT && !(msg->flags & LOG_PREIX) - if (($prev_flags & 8) && !($msg->flags & 4)) - set $prefix = 0 + + set var $info = &$desc->info + set var $prefix = 1 + set var $newline = 1 + + set var $begin = $desc->text_blk_lpos.begin % (1U << prb->text_data_ring.size_bits) + set var $next = $desc->text_blk_lpos.next % (1U << prb->text_data_ring.size_bits) + + # handle data-less record + if ($begin & 1) + set var $text_len = 0 + set var $log = "" + else + # handle wrapping data block + if ($begin > $next) + set var $begin = 0 + end + + # skip over descriptor id + set var $begin = $begin + sizeof(long) + + # handle truncated message + if ($next - $begin < $info->text_len) + set var $text_len = $next - $begin + else + set var $text_len = $info->text_len + end + + set var $log = &prb->text_data_ring.data[$begin] + end + + # prev & LOG_CONT && !(info->flags & LOG_PREIX) + if (($prev_flags & 8) && !($info->flags & 4)) + set var $prefix = 0 end - # msg->flags & LOG_CONT - if ($msg->flags & 8) + # info->flags & LOG_CONT + if ($info->flags & 8) # (prev & LOG_CONT && !(prev & LOG_NEWLINE)) if (($prev_flags & 8) && !($prev_flags & 2)) - set $prefix = 0 + set var $prefix = 0 end - # (!(msg->flags & LOG_NEWLINE)) - if (!($msg->flags & 2)) - set $newline = 0 + # (!(info->flags & LOG_NEWLINE)) + if (!($info->flags & 2)) + set var $newline = 0 end end if ($prefix) - printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000 + printf "[%5lu.%06lu] ", $info->ts_nsec / 1000000000, $info->ts_nsec % 1000000000 end - if ($msg->text_len != 0) - eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len + if ($text_len) + eval "printf \"%%%d.%ds\", $log", $text_len, $text_len end if ($newline) printf "\n" end - if ($msg->dict_len > 0) - set $dict = $log + $msg->text_len - set $idx = 0 - set $line = 1 - while ($idx < $msg->dict_len) + + # handle dictionary data + + set var $begin = $desc->dict_blk_lpos.begin % (1U << prb->dict_data_ring.size_bits) + set var $next = $desc->dict_blk_lpos.next % (1U << prb->dict_data_ring.size_bits) + + # handle data-less record + if ($begin & 1) + set var $dict_len = 0 + set var $dict = "" + else + # handle wrapping data block + if ($begin > $next) + set var $begin = 0 + end + + # skip over descriptor id + set var $begin = $begin + sizeof(long) + + # handle truncated message + if ($next - $begin < $info->dict_len) + set var $dict_len = $next - $begin + else + set var $dict_len = $info->dict_len + end + + set var $dict = &prb->dict_data_ring.data[$begin] + end + + if ($dict_len > 0) + set var $idx = 0 + set var $line = 1 + while ($idx < $dict_len) if ($line) printf " " - set $line = 0 + set var $line = 0 end - set $c = $dict[$idx] + set var $c = $dict[$idx] if ($c == '\0') printf "\n" - set $line = 1 + set var $line = 1 else if ($c < ' ' || $c >= 127 || $c == '\\') printf "\\x%02x", $c @@ -228,33 +282,40 @@ define dump_log_idx printf "%c", $c end end - set $idx = $idx + 1 + set var $idx = $idx + 1 end printf "\n" end end -document dump_log_idx - Dump a single log given its index in the log buffer. The first - parameter is the index into log_buf, the second is optional and - specified the previous log buffer's flags, used for properly - formatting continued lines. +document dump_record + Dump a single record. The first parameter is the descriptor + sequence number, the second is optional and specifies the + previous record's flags, used for properly formatting + continued lines. end define dmesg - set $i = log_first_idx - set $end_idx = log_first_idx - set $prev_flags = 0 + set var $desc_committed = 1UL << ((sizeof(long) * 8) - 1) + set var $flags_mask = 3UL << ((sizeof(long) * 8) - 2) + set var $id_mask = ~$flags_mask + + set var $desc_count = 1U << prb->desc_ring.count_bits + set var $prev_flags = 0 + + set var $id = prb->desc_ring.tail_id.counter + set var $end_id = prb->desc_ring.head_id.counter while (1) - set $msg = ((struct printk_log *) (log_buf + $i)) - if ($msg->len == 0) - set $i = 0 - else - dump_log_idx $i $prev_flags - set $i = $i + $msg->len - set $prev_flags = $msg->flags + set var $desc = &prb->desc_ring.descs[$id % $desc_count] + + # skip non-committed record + if (($desc->state_var.counter & $flags_mask) == $desc_committed) + dump_record $desc $prev_flags + set var $prev_flags = $desc->info.flags end - if ($i == $end_idx) + + set var $id = ($id + 1) & $id_mask + if ($id == $end_id) loop_break end end diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py index 2fa7bb83885f..6c6022012ea8 100644 --- a/scripts/gdb/linux/dmesg.py +++ b/scripts/gdb/linux/dmesg.py @@ -16,8 +16,13 @@ import sys from linux import utils -printk_log_type = utils.CachedType("struct printk_log") - +printk_info_type = utils.CachedType("struct printk_info") +prb_data_blk_lpos_type = utils.CachedType("struct prb_data_blk_lpos") +prb_desc_type = utils.CachedType("struct prb_desc") +prb_desc_ring_type = utils.CachedType("struct prb_desc_ring") +prb_data_ring_type = utils.CachedType("struct prb_data_ring") +printk_ringbuffer_type = utils.CachedType("struct printk_ringbuffer") +atomic_long_type = utils.CachedType("atomic_long_t") class LxDmesg(gdb.Command): """Print Linux kernel log buffer.""" @@ -26,44 +31,102 @@ class LxDmesg(gdb.Command): super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA) def invoke(self, arg, from_tty): - log_buf_addr = int(str(gdb.parse_and_eval( - "(void *)'printk.c'::log_buf")).split()[0], 16) - log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx")) - log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx")) - log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len")) - inf = gdb.inferiors()[0] - start = log_buf_addr + log_first_idx - if log_first_idx < log_next_idx: - log_buf_2nd_half = -1 - length = log_next_idx - log_first_idx - log_buf = utils.read_memoryview(inf, start, length).tobytes() - else: - log_buf_2nd_half = log_buf_len - log_first_idx - a = utils.read_memoryview(inf, start, log_buf_2nd_half) - b = utils.read_memoryview(inf, log_buf_addr, log_next_idx) - log_buf = a.tobytes() + b.tobytes() - - length_offset = printk_log_type.get_type()['len'].bitpos // 8 - text_len_offset = printk_log_type.get_type()['text_len'].bitpos // 8 - time_stamp_offset = printk_log_type.get_type()['ts_nsec'].bitpos // 8 - text_offset = printk_log_type.get_type().sizeof - - pos = 0 - while pos < log_buf.__len__(): - length = utils.read_u16(log_buf, pos + length_offset) - if length == 0: - if log_buf_2nd_half == -1: - gdb.write("Corrupted log buffer!\n") + + # read in prb structure + prb_addr = int(str(gdb.parse_and_eval("(void *)'printk.c'::prb")).split()[0], 16) + sz = printk_ringbuffer_type.get_type().sizeof + prb = utils.read_memoryview(inf, prb_addr, sz).tobytes() + + # read in descriptor ring structure + off = printk_ringbuffer_type.get_type()['desc_ring'].bitpos // 8 + addr = prb_addr + off + sz = prb_desc_ring_type.get_type().sizeof + desc_ring = utils.read_memoryview(inf, addr, sz).tobytes() + + # read in descriptor array + off = prb_desc_ring_type.get_type()['count_bits'].bitpos // 8 + desc_ring_count = 1 << utils.read_u32(desc_ring, off) + desc_sz = prb_desc_type.get_type().sizeof + off = prb_desc_ring_type.get_type()['descs'].bitpos // 8 + addr = utils.read_ulong(desc_ring, off) + descs = utils.read_memoryview(inf, addr, desc_sz * desc_ring_count).tobytes() + + # read in text data ring structure + off = printk_ringbuffer_type.get_type()['text_data_ring'].bitpos // 8 + addr = prb_addr + off + sz = prb_data_ring_type.get_type().sizeof + text_data_ring = utils.read_memoryview(inf, addr, sz).tobytes() + + # read in text data + off = prb_data_ring_type.get_type()['size_bits'].bitpos // 8 + text_data_sz = 1 << utils.read_u32(text_data_ring, off) + off = prb_data_ring_type.get_type()['data'].bitpos // 8 + addr = utils.read_ulong(text_data_ring, off) + text_data = utils.read_memoryview(inf, addr, text_data_sz).tobytes() + + counter_off = atomic_long_type.get_type()['counter'].bitpos // 8 + + sv_off = prb_desc_type.get_type()['state_var'].bitpos // 8 + + off = prb_desc_type.get_type()['text_blk_lpos'].bitpos // 8 + begin_off = off + (prb_data_blk_lpos_type.get_type()['begin'].bitpos // 8) + next_off = off + (prb_data_blk_lpos_type.get_type()['next'].bitpos // 8) + + off = prb_desc_type.get_type()['info'].bitpos // 8 + ts_off = off + printk_info_type.get_type()['ts_nsec'].bitpos // 8 + len_off = off + printk_info_type.get_type()['text_len'].bitpos // 8 + + # definitions from kernel/printk/printk_ringbuffer.h + desc_sv_bits = utils.get_long_type().sizeof * 8 + desc_committed_mask = 1 << (desc_sv_bits - 1) + desc_reuse_mask = 1 << (desc_sv_bits - 2) + desc_flags_mask = desc_committed_mask | desc_reuse_mask + desc_id_mask = ~desc_flags_mask + + # read in tail and head descriptor ids + off = prb_desc_ring_type.get_type()['tail_id'].bitpos // 8 + tail_id = utils.read_u64(desc_ring, off + counter_off) + off = prb_desc_ring_type.get_type()['head_id'].bitpos // 8 + head_id = utils.read_u64(desc_ring, off + counter_off) + + did = tail_id + while True: + ind = did % desc_ring_count + desc_off = desc_sz * ind + + # skip non-committed record + state = utils.read_u64(descs, desc_off + sv_off + counter_off) & desc_flags_mask + if state != desc_committed_mask: + if did == head_id: break - pos = log_buf_2nd_half + did = (did + 1) & desc_id_mask continue - text_len = utils.read_u16(log_buf, pos + text_len_offset) - text_start = pos + text_offset - text = log_buf[text_start:text_start + text_len].decode( - encoding='utf8', errors='replace') - time_stamp = utils.read_u64(log_buf, pos + time_stamp_offset) + begin = utils.read_ulong(descs, desc_off + begin_off) % text_data_sz + end = utils.read_ulong(descs, desc_off + next_off) % text_data_sz + + # handle data-less record + if begin & 1 == 1: + text = "" + else: + # handle wrapping data block + if begin > end: + begin = 0 + + # skip over descriptor id + text_start = begin + utils.get_long_type().sizeof + + text_len = utils.read_u16(descs, desc_off + len_off) + + # handle truncated message + if end - text_start < text_len: + text_len = end - text_start + + text = text_data[text_start:text_start + text_len].decode( + encoding='utf8', errors='replace') + + time_stamp = utils.read_u64(descs, desc_off + ts_off) for line in text.splitlines(): msg = u"[{time:12.6f}] {line}\n".format( @@ -75,7 +138,9 @@ class LxDmesg(gdb.Command): msg = msg.encode(encoding='utf8', errors='replace') gdb.write(msg) - pos += length + if did == head_id: + break + did = (did + 1) & desc_id_mask LxDmesg() -- cgit v1.2.3 From 10dcb06d40411a73e1ae111717e9a987bb760313 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 14 Sep 2020 14:39:52 +0206 Subject: printk: ringbuffer: change representation of states Rather than deriving the state by evaluating bits within the flags area of the state variable, assign the states explicit values and set those values in the flags area. Introduce macros to make it simple to read and write state values for the state variable. Although the functionality is preserved, the binary representation for the states is changed. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200914123354.832-5-john.ogness@linutronix.de --- Documentation/admin-guide/kdump/gdbmacros.txt | 12 +++++++---- kernel/printk/printk_ringbuffer.c | 28 ++++++------------------ kernel/printk/printk_ringbuffer.h | 31 +++++++++++++++++---------- scripts/gdb/linux/dmesg.py | 11 +++++----- 4 files changed, 41 insertions(+), 41 deletions(-) (limited to 'Documentation/admin-guide') diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt index 7adece30237e..8f533b751c46 100644 --- a/Documentation/admin-guide/kdump/gdbmacros.txt +++ b/Documentation/admin-guide/kdump/gdbmacros.txt @@ -295,9 +295,12 @@ document dump_record end define dmesg - set var $desc_committed = 1UL << ((sizeof(long) * 8) - 1) - set var $flags_mask = 3UL << ((sizeof(long) * 8) - 2) - set var $id_mask = ~$flags_mask + # definitions from kernel/printk/printk_ringbuffer.h + set var $desc_committed = 1 + set var $desc_sv_bits = sizeof(long) * 8 + set var $desc_flags_shift = $desc_sv_bits - 2 + set var $desc_flags_mask = 3 << $desc_flags_shift + set var $id_mask = ~$desc_flags_mask set var $desc_count = 1U << prb->desc_ring.count_bits set var $prev_flags = 0 @@ -309,7 +312,8 @@ define dmesg set var $desc = &prb->desc_ring.descs[$id % $desc_count] # skip non-committed record - if (($desc->state_var.counter & $flags_mask) == $desc_committed) + set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift) + if ($state == $desc_committed) dump_record $desc $prev_flags set var $prev_flags = $desc->info.flags end diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index 5a9c7c8cff7b..c0d31185ccbf 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -348,14 +348,6 @@ static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) return true; } -/* The possible responses of a descriptor state-query. */ -enum desc_state { - desc_miss, /* ID mismatch */ - desc_reserved, /* reserved, in use by writer */ - desc_committed, /* committed, writer is done */ - desc_reusable, /* free, not yet used by any writer */ -}; - /* Query the state of a descriptor. */ static enum desc_state get_desc_state(unsigned long id, unsigned long state_val) @@ -363,13 +355,7 @@ static enum desc_state get_desc_state(unsigned long id, if (id != DESC_ID(state_val)) return desc_miss; - if (state_val & DESC_REUSE_MASK) - return desc_reusable; - - if (state_val & DESC_COMMITTED_MASK) - return desc_committed; - - return desc_reserved; + return DESC_STATE(state_val); } /* @@ -484,8 +470,8 @@ out: static void desc_make_reusable(struct prb_desc_ring *desc_ring, unsigned long id) { - unsigned long val_committed = id | DESC_COMMITTED_MASK; - unsigned long val_reusable = val_committed | DESC_REUSE_MASK; + unsigned long val_committed = DESC_SV(id, desc_committed); + unsigned long val_reusable = DESC_SV(id, desc_reusable); struct prb_desc *desc = to_desc(desc_ring, id); atomic_long_t *state_var = &desc->state_var; @@ -921,7 +907,7 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) */ prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */ if (prev_state_val && - prev_state_val != (id_prev_wrap | DESC_COMMITTED_MASK | DESC_REUSE_MASK)) { + get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) { WARN_ON_ONCE(1); return false; } @@ -935,7 +921,7 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) * This pairs with desc_read:D. */ if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val, - id | 0)) { /* LMM(desc_reserve:F) */ + DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */ WARN_ON_ONCE(1); return false; } @@ -1254,7 +1240,7 @@ void prb_commit(struct prb_reserved_entry *e) { struct prb_desc_ring *desc_ring = &e->rb->desc_ring; struct prb_desc *d = to_desc(desc_ring, e->id); - unsigned long prev_state_val = e->id | 0; + unsigned long prev_state_val = DESC_SV(e->id, desc_reserved); /* Now the writer has finished all writing: LMM(prb_commit:A) */ @@ -1267,7 +1253,7 @@ void prb_commit(struct prb_reserved_entry *e) * this. This pairs with desc_read:B. */ if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, - e->id | DESC_COMMITTED_MASK)) { /* LMM(prb_commit:B) */ + DESC_SV(e->id, desc_committed))) { /* LMM(prb_commit:B) */ WARN_ON_ONCE(1); } diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index e6302da041f9..a9d85a6727b1 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -112,16 +112,25 @@ struct prb_reserved_entry { unsigned int text_space; }; -#define _DATA_SIZE(sz_bits) (1UL << (sz_bits)) -#define _DESCS_COUNT(ct_bits) (1U << (ct_bits)) -#define DESC_SV_BITS (sizeof(unsigned long) * 8) -#define DESC_COMMITTED_MASK (1UL << (DESC_SV_BITS - 1)) -#define DESC_REUSE_MASK (1UL << (DESC_SV_BITS - 2)) -#define DESC_FLAGS_MASK (DESC_COMMITTED_MASK | DESC_REUSE_MASK) -#define DESC_ID_MASK (~DESC_FLAGS_MASK) -#define DESC_ID(sv) ((sv) & DESC_ID_MASK) -#define FAILED_LPOS 0x1 -#define NO_LPOS 0x3 +/* The possible responses of a descriptor state-query. */ +enum desc_state { + desc_miss = -1, /* ID mismatch (pseudo state) */ + desc_reserved = 0x0, /* reserved, in use by writer */ + desc_committed = 0x1, /* committed by writer */ + desc_reusable = 0x3, /* free, not yet used by any writer */ +}; + +#define _DATA_SIZE(sz_bits) (1UL << (sz_bits)) +#define _DESCS_COUNT(ct_bits) (1U << (ct_bits)) +#define DESC_SV_BITS (sizeof(unsigned long) * 8) +#define DESC_FLAGS_SHIFT (DESC_SV_BITS - 2) +#define DESC_FLAGS_MASK (3UL << DESC_FLAGS_SHIFT) +#define DESC_STATE(sv) (3UL & (sv >> DESC_FLAGS_SHIFT)) +#define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id) +#define DESC_ID_MASK (~DESC_FLAGS_MASK) +#define DESC_ID(sv) ((sv) & DESC_ID_MASK) +#define FAILED_LPOS 0x1 +#define NO_LPOS 0x3 #define FAILED_BLK_LPOS \ { \ @@ -213,7 +222,7 @@ struct prb_reserved_entry { */ #define BLK0_LPOS(sz_bits) (-(_DATA_SIZE(sz_bits))) #define DESC0_ID(ct_bits) DESC_ID(-(_DESCS_COUNT(ct_bits) + 1)) -#define DESC0_SV(ct_bits) (DESC_COMMITTED_MASK | DESC_REUSE_MASK | DESC0_ID(ct_bits)) +#define DESC0_SV(ct_bits) DESC_SV(DESC0_ID(ct_bits), desc_reusable) /* * Define a ringbuffer with an external text data buffer. The same as diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py index 6c6022012ea8..dd8c0b95063a 100644 --- a/scripts/gdb/linux/dmesg.py +++ b/scripts/gdb/linux/dmesg.py @@ -78,10 +78,10 @@ class LxDmesg(gdb.Command): len_off = off + printk_info_type.get_type()['text_len'].bitpos // 8 # definitions from kernel/printk/printk_ringbuffer.h + desc_committed = 1 desc_sv_bits = utils.get_long_type().sizeof * 8 - desc_committed_mask = 1 << (desc_sv_bits - 1) - desc_reuse_mask = 1 << (desc_sv_bits - 2) - desc_flags_mask = desc_committed_mask | desc_reuse_mask + desc_flags_shift = desc_sv_bits - 2 + desc_flags_mask = 3 << desc_flags_shift desc_id_mask = ~desc_flags_mask # read in tail and head descriptor ids @@ -96,8 +96,9 @@ class LxDmesg(gdb.Command): desc_off = desc_sz * ind # skip non-committed record - state = utils.read_u64(descs, desc_off + sv_off + counter_off) & desc_flags_mask - if state != desc_committed_mask: + state = 3 & (utils.read_u64(descs, desc_off + sv_off + + counter_off) >> desc_flags_shift) + if state != desc_committed: if did == head_id: break did = (did + 1) & desc_id_mask -- cgit v1.2.3 From 4cfc7258f876a7feba673ac6d050f525b39cc84c Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 14 Sep 2020 14:39:53 +0206 Subject: printk: ringbuffer: add finalization/extension support Add support for extending the newest data block. For this, introduce a new finalization state (desc_finalized) denoting a committed descriptor that cannot be extended. Until a record is finalized, a writer can reopen that record to append new data. Reopening a record means transitioning from the desc_committed state back to the desc_reserved state. A writer can explicitly finalize a record if there is no intention of extending it. Also, records are automatically finalized when a new record is reserved. This relieves writers of needing to explicitly finalize while also making such records available to readers sooner. (Readers can only traverse finalized records.) Four new memory barrier pairs are introduced. Two of them are insignificant additions (data_realloc:A/desc_read:D and data_realloc:A/data_push_tail:B) because they are alternate path memory barriers that exactly match the purpose, pairing, and context of the two existing memory barrier pairs they provide an alternate path for. The other two new memory barrier pairs are significant additions: desc_reopen_last:A / _prb_commit:B - When reopening a descriptor, ensure the state transitions back to desc_reserved before fully trusting the descriptor data. _prb_commit:B / desc_reserve:D - When committing a descriptor, ensure the state transitions to desc_committed before checking the head ID to see if the descriptor needs to be finalized. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200914123354.832-6-john.ogness@linutronix.de --- Documentation/admin-guide/kdump/gdbmacros.txt | 3 +- kernel/printk/printk_ringbuffer.c | 525 +++++++++++++++++++++++--- kernel/printk/printk_ringbuffer.h | 6 +- scripts/gdb/linux/dmesg.py | 3 +- 4 files changed, 480 insertions(+), 57 deletions(-) (limited to 'Documentation/admin-guide') diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt index 8f533b751c46..94fabb165abf 100644 --- a/Documentation/admin-guide/kdump/gdbmacros.txt +++ b/Documentation/admin-guide/kdump/gdbmacros.txt @@ -297,6 +297,7 @@ end define dmesg # definitions from kernel/printk/printk_ringbuffer.h set var $desc_committed = 1 + set var $desc_finalized = 2 set var $desc_sv_bits = sizeof(long) * 8 set var $desc_flags_shift = $desc_sv_bits - 2 set var $desc_flags_mask = 3 << $desc_flags_shift @@ -313,7 +314,7 @@ define dmesg # skip non-committed record set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift) - if ($state == $desc_committed) + if ($state == $desc_committed || $state == $desc_finalized) dump_record $desc $prev_flags set var $prev_flags = $desc->info.flags end diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index c0d31185ccbf..f4e2e9890e0f 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -46,20 +46,26 @@ * into a single descriptor field named @state_var, allowing ID and state to * be synchronously and atomically updated. * - * Descriptors have three states: + * Descriptors have four states: * * reserved * A writer is modifying the record. * * committed - * The record and all its data are complete and available for reading. + * The record and all its data are written. A writer can reopen the + * descriptor (transitioning it back to reserved), but in the committed + * state the data is consistent. + * + * finalized + * The record and all its data are complete and available for reading. A + * writer cannot reopen the descriptor. * * reusable * The record exists, but its text and/or dictionary data may no longer * be available. * * Querying the @state_var of a record requires providing the ID of the - * descriptor to query. This can yield a possible fourth (pseudo) state: + * descriptor to query. This can yield a possible fifth (pseudo) state: * * miss * The descriptor being queried has an unexpected ID. @@ -79,6 +85,28 @@ * committed or reusable queried state. This makes it possible that a valid * sequence number of the tail is always available. * + * Descriptor Finalization + * ~~~~~~~~~~~~~~~~~~~~~~~ + * When a writer calls the commit function prb_commit(), record data is + * fully stored and is consistent within the ringbuffer. However, a writer can + * reopen that record, claiming exclusive access (as with prb_reserve()), and + * modify that record. When finished, the writer must again commit the record. + * + * In order for a record to be made available to readers (and also become + * recyclable for writers), it must be finalized. A finalized record cannot be + * reopened and can never become "unfinalized". Record finalization can occur + * in three different scenarios: + * + * 1) A writer can simultaneously commit and finalize its record by calling + * prb_final_commit() instead of prb_commit(). + * + * 2) When a new record is reserved and the previous record has been + * committed via prb_commit(), that previous record is automatically + * finalized. + * + * 3) When a record is committed via prb_commit() and a newer record + * already exists, the record being committed is automatically finalized. + * * Data Rings * ~~~~~~~~~~ * The two data rings (text and dictionary) function identically. They exist @@ -97,7 +125,7 @@ * are met: * * 1) The descriptor associated with the data block is in the committed - * queried state. + * or finalized queried state. * * 2) The blk_lpos struct within the descriptor associated with the data * block references back to the same data block. @@ -156,9 +184,38 @@ * * r.info->ts_nsec = local_clock(); * + * prb_final_commit(&e); + * } + * + * Note that additional writer functions are available to extend a record + * after it has been committed but not yet finalized. This can be done as + * long as no new records have been reserved and the caller is the same. + * + * Sample writer code (record extending):: + * + * // alternate rest of previous example + * r.info->ts_nsec = local_clock(); + * r.info->text_len = strlen(textstr); + * r.info->caller_id = printk_caller_id(); + * + * // commit the record (but do not finalize yet) * prb_commit(&e); * } * + * ... + * + * // specify additional 5 bytes text space to extend + * prb_rec_init_wr(&r, 5, 0); + * + * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id())) { + * snprintf(&r.text_buf[r.info->text_len], + * r.text_buf_size - r.info->text_len, "hello"); + * + * r.info->text_len += 5; + * + * prb_final_commit(&e); + * } + * * Sample reader code:: * * struct printk_info info; @@ -236,15 +293,21 @@ * desc_reserve:F / desc_read:D * set new descriptor id and reserved (state), then allow writer changes * - * data_alloc:A / desc_read:D + * data_alloc:A (or data_realloc:A) / desc_read:D * set old descriptor reusable (state), then modify new data block area * - * data_alloc:A / data_push_tail:B + * data_alloc:A (or data_realloc:A) / data_push_tail:B * push data tail (lpos), then modify new data block area * - * prb_commit:B / desc_read:B + * _prb_commit:B / desc_read:B * store writer changes, then set new descriptor committed (state) * + * desc_reopen_last:A / _prb_commit:B + * set descriptor reserved (state), then read descriptor data + * + * _prb_commit:B / desc_reserve:D + * set new descriptor committed (state), then check descriptor head (id) + * * data_push_tail:D / data_push_tail:A * set descriptor reusable (state), then push data tail (lpos) * @@ -386,16 +449,16 @@ static enum desc_state desc_read(struct prb_desc_ring *desc_ring, /* * Guarantee the state is loaded before copying the descriptor * content. This avoids copying obsolete descriptor content that might - * not apply to the descriptor state. This pairs with prb_commit:B. + * not apply to the descriptor state. This pairs with _prb_commit:B. * * Memory barrier involvement: * - * If desc_read:A reads from prb_commit:B, then desc_read:C reads - * from prb_commit:A. + * If desc_read:A reads from _prb_commit:B, then desc_read:C reads + * from _prb_commit:A. * * Relies on: * - * WMB from prb_commit:A to prb_commit:B + * WMB from _prb_commit:A to _prb_commit:B * matching * RMB from desc_read:A to desc_read:C */ @@ -431,7 +494,8 @@ static enum desc_state desc_read(struct prb_desc_ring *desc_ring, * * 2. Guarantee the record data is loaded before re-checking the * state. This avoids reading an obsolete descriptor state that may - * not apply to the copied data. This pairs with data_alloc:A. + * not apply to the copied data. This pairs with data_alloc:A and + * data_realloc:A. * * Memory barrier involvement: * @@ -463,19 +527,19 @@ out: } /* - * Take a specified descriptor out of the committed state by attempting - * the transition from committed to reusable. Either this context or some + * Take a specified descriptor out of the finalized state by attempting + * the transition from finalized to reusable. Either this context or some * other context will have been successful. */ static void desc_make_reusable(struct prb_desc_ring *desc_ring, unsigned long id) { - unsigned long val_committed = DESC_SV(id, desc_committed); + unsigned long val_finalized = DESC_SV(id, desc_finalized); unsigned long val_reusable = DESC_SV(id, desc_reusable); struct prb_desc *desc = to_desc(desc_ring, id); atomic_long_t *state_var = &desc->state_var; - atomic_long_cmpxchg_relaxed(state_var, val_committed, + atomic_long_cmpxchg_relaxed(state_var, val_finalized, val_reusable); /* LMM(desc_make_reusable:A) */ } @@ -484,7 +548,7 @@ static void desc_make_reusable(struct prb_desc_ring *desc_ring, * data block from @lpos_begin until @lpos_end into the reusable state. * * If there is any problem making the associated descriptor reusable, either - * the descriptor has not yet been committed or another writer context has + * the descriptor has not yet been finalized or another writer context has * already pushed the tail lpos past the problematic data block. Regardless, * on error the caller can re-load the tail lpos to determine the situation. */ @@ -528,10 +592,10 @@ static bool data_make_reusable(struct printk_ringbuffer *rb, switch (d_state) { case desc_miss: - return false; case desc_reserved: - return false; case desc_committed: + return false; + case desc_finalized: /* * This data block is invalid if the descriptor * does not point back to it. @@ -616,7 +680,7 @@ static bool data_push_tail(struct printk_ringbuffer *rb, * data_make_reusable() may be due to a newly * recycled data area causing the tail lpos to * have been previously pushed. This pairs with - * data_alloc:A. + * data_alloc:A and data_realloc:A. * * Memory barrier involvement: * @@ -729,8 +793,9 @@ static bool desc_push_tail(struct printk_ringbuffer *rb, */ return true; case desc_reserved: - return false; case desc_committed: + return false; + case desc_finalized: desc_make_reusable(desc_ring, tail_id); break; case desc_reusable: @@ -751,7 +816,7 @@ static bool desc_push_tail(struct printk_ringbuffer *rb, /* * Check the next descriptor after @tail_id before pushing the tail - * to it because the tail must always be in a committed or reusable + * to it because the tail must always be in a finalized or reusable * state. The implementation of prb_first_seq() relies on this. * * A successful read implies that the next descriptor is less than or @@ -760,7 +825,7 @@ static bool desc_push_tail(struct printk_ringbuffer *rb, */ d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc); /* LMM(desc_push_tail:A) */ - if (d_state == desc_committed || d_state == desc_reusable) { + if (d_state == desc_finalized || d_state == desc_reusable) { /* * Guarantee any descriptor states that have transitioned to * reusable are stored before pushing the tail ID. This allows @@ -895,6 +960,10 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) * another CPU may have pushed the tail ID. This pairs * with desc_push_tail:C and this also pairs with * prb_first_seq:C. + * + * 5. Guarantee the head ID is stored before trying to + * finalize the previous descriptor. This pairs with + * _prb_commit:B. */ } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id, id)); /* LMM(desc_reserve:D) */ @@ -1024,6 +1093,84 @@ static char *data_alloc(struct printk_ringbuffer *rb, return &blk->data[0]; } +/* + * Try to resize an existing data block associated with the descriptor + * specified by @id. If the resized data block should become wrapped, it + * copies the old data to the new data block. If @size yields a data block + * with the same or less size, the data block is left as is. + * + * Fail if this is not the last allocated data block or if there is not + * enough space or it is not possible make enough space. + * + * Return a pointer to the beginning of the entire data buffer or NULL on + * failure. + */ +static char *data_realloc(struct printk_ringbuffer *rb, + struct prb_data_ring *data_ring, unsigned int size, + struct prb_data_blk_lpos *blk_lpos, unsigned long id) +{ + struct prb_data_block *blk; + unsigned long head_lpos; + unsigned long next_lpos; + bool wrapped; + + /* Reallocation only works if @blk_lpos is the newest data block. */ + head_lpos = atomic_long_read(&data_ring->head_lpos); + if (head_lpos != blk_lpos->next) + return NULL; + + /* Keep track if @blk_lpos was a wrapping data block. */ + wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next)); + + size = to_blk_size(size); + + next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size); + + /* If the data block does not increase, there is nothing to do. */ + if (head_lpos - next_lpos < DATA_SIZE(data_ring)) { + blk = to_block(data_ring, blk_lpos->begin); + return &blk->data[0]; + } + + if (!data_push_tail(rb, data_ring, next_lpos - DATA_SIZE(data_ring))) + return NULL; + + /* The memory barrier involvement is the same as data_alloc:A. */ + if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos, + next_lpos)) { /* LMM(data_realloc:A) */ + return NULL; + } + + blk = to_block(data_ring, blk_lpos->begin); + + if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) { + struct prb_data_block *old_blk = blk; + + /* Wrapping data blocks store their data at the beginning. */ + blk = to_block(data_ring, 0); + + /* + * Store the ID on the wrapped block for consistency. + * The printk_ringbuffer does not actually use it. + */ + blk->id = id; + + if (!wrapped) { + /* + * Since the allocated space is now in the newly + * created wrapping data block, copy the content + * from the old data block. + */ + memcpy(&blk->data[0], &old_blk->data[0], + (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id)); + } + } + + blk_lpos->next = next_lpos; + + return &blk->data[0]; +} + /* Return the number of bytes used by a data block. */ static unsigned int space_used(struct prb_data_ring *data_ring, struct prb_data_blk_lpos *blk_lpos) @@ -1104,6 +1251,206 @@ static const char *get_data(struct prb_data_ring *data_ring, return &db->data[0]; } +/* + * Attempt to transition the newest descriptor from committed back to reserved + * so that the record can be modified by a writer again. This is only possible + * if the descriptor is not yet finalized and the provided @caller_id matches. + */ +static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring, + u32 caller_id, unsigned long *id_out) +{ + unsigned long prev_state_val; + enum desc_state d_state; + struct prb_desc desc; + struct prb_desc *d; + unsigned long id; + + id = atomic_long_read(&desc_ring->head_id); + + /* + * To reduce unnecessarily reopening, first check if the descriptor + * state and caller ID are correct. + */ + d_state = desc_read(desc_ring, id, &desc); + if (d_state != desc_committed || desc.info.caller_id != caller_id) + return NULL; + + d = to_desc(desc_ring, id); + + prev_state_val = DESC_SV(id, desc_committed); + + /* + * Guarantee the reserved state is stored before reading any + * record data. A full memory barrier is needed because @state_var + * modification is followed by reading. This pairs with _prb_commit:B. + * + * Memory barrier involvement: + * + * If desc_reopen_last:A reads from _prb_commit:B, then + * prb_reserve_in_last:A reads from _prb_commit:A. + * + * Relies on: + * + * WMB from _prb_commit:A to _prb_commit:B + * matching + * MB If desc_reopen_last:A to prb_reserve_in_last:A + */ + if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, + DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */ + return NULL; + } + + *id_out = id; + return d; +} + +/** + * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer + * used by the newest record. + * + * @e: The entry structure to setup. + * @rb: The ringbuffer to re-reserve and extend data in. + * @r: The record structure to allocate buffers for. + * @caller_id: The caller ID of the caller (reserving writer). + * + * This is the public function available to writers to re-reserve and extend + * data. + * + * The writer specifies the text size to extend (not the new total size) by + * setting the @text_buf_size field of @r. Extending dictionaries is not + * supported, so @dict_buf_size of @r should be set to 0. To ensure proper + * initialization of @r, prb_rec_init_wr() should be used. + * + * This function will fail if @caller_id does not match the caller ID of the + * newest record. In that case the caller must reserve new data using + * prb_reserve(). + * + * Context: Any context. Disables local interrupts on success. + * Return: true if text data could be extended, otherwise false. + * + * On success: + * + * - @r->text_buf points to the beginning of the entire text buffer. + * + * - @r->text_buf_size is set to the new total size of the buffer. + * + * - @r->dict_buf and @r->dict_buf_size are cleared because extending + * the dict buffer is not supported. + * + * - @r->info is not touched so that @r->info->text_len could be used + * to append the text. + * + * - prb_record_text_space() can be used on @e to query the new + * actually used space. + * + * Important: All @r->info fields will already be set with the current values + * for the record. I.e. @r->info->text_len will be less than + * @text_buf_size and @r->info->dict_len may be set, even though + * @dict_buf_size is 0. Writers can use @r->info->text_len to know + * where concatenation begins and writers should update + * @r->info->text_len after concatenating. + */ +bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, + struct printk_record *r, u32 caller_id) +{ + unsigned int data_size; + struct prb_desc *d; + unsigned long id; + + local_irq_save(e->irqflags); + + /* Transition the newest descriptor back to the reserved state. */ + d = desc_reopen_last(&rb->desc_ring, caller_id, &id); + if (!d) { + local_irq_restore(e->irqflags); + goto fail_reopen; + } + + /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */ + + /* + * Set the @e fields here so that prb_commit() can be used if + * anything fails from now on. + */ + e->rb = rb; + e->id = id; + + /* + * desc_reopen_last() checked the caller_id, but there was no + * exclusive access at that point. The descriptor may have + * changed since then. + */ + if (caller_id != d->info.caller_id) + goto fail; + + if (BLK_DATALESS(&d->text_blk_lpos)) { + if (WARN_ON_ONCE(d->info.text_len != 0)) { + pr_warn_once("wrong text_len value (%hu, expecting 0)\n", + d->info.text_len); + d->info.text_len = 0; + } + + if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) + goto fail; + + r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size, + &d->text_blk_lpos, id); + } else { + if (!get_data(&rb->text_data_ring, &d->text_blk_lpos, &data_size)) + goto fail; + + /* + * Increase the buffer size to include the original size. If + * the meta data (@text_len) is not sane, use the full data + * block size. + */ + if (WARN_ON_ONCE(d->info.text_len > data_size)) { + pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n", + d->info.text_len, data_size); + d->info.text_len = data_size; + } + r->text_buf_size += d->info.text_len; + + if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) + goto fail; + + r->text_buf = data_realloc(rb, &rb->text_data_ring, r->text_buf_size, + &d->text_blk_lpos, id); + } + if (r->text_buf_size && !r->text_buf) + goto fail; + + /* Although dictionary data may be in use, it cannot be extended. */ + r->dict_buf = NULL; + r->dict_buf_size = 0; + + r->info = &d->info; + + e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); + + return true; +fail: + prb_commit(e); + /* prb_commit() re-enabled interrupts. */ +fail_reopen: + /* Make it clear to the caller that the re-reserve failed. */ + memset(r, 0, sizeof(*r)); + return false; +} + +/* + * Attempt to finalize a specified descriptor. If this fails, the descriptor + * is either already final or it will finalize itself when the writer commits. + */ +static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id) +{ + unsigned long prev_state_val = DESC_SV(id, desc_committed); + struct prb_desc *d = to_desc(desc_ring, id); + + atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val, + DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */ +} + /** * prb_reserve() - Reserve space in the ringbuffer. * @@ -1197,6 +1544,15 @@ bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, else d->info.seq = seq + DESCS_COUNT(desc_ring); + /* + * New data is about to be reserved. Once that happens, previous + * descriptors are no longer able to be extended. Finalize the + * previous descriptor now so that it can be made available to + * readers. (For seq==0 there is no previous descriptor.) + */ + if (d->info.seq > 0) + desc_make_final(desc_ring, DESC_ID(id - 1)); + r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size, &d->text_blk_lpos, id); /* If text data allocation fails, a data-less record is committed. */ @@ -1227,33 +1583,40 @@ fail: return false; } -/** - * prb_commit() - Commit (previously reserved) data to the ringbuffer. - * - * @e: The entry containing the reserved data information. - * - * This is the public function available to writers to commit data. - * - * Context: Any context. Enables local interrupts. - */ -void prb_commit(struct prb_reserved_entry *e) +/* Commit the data (possibly finalizing it) and restore interrupts. */ +static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val) { struct prb_desc_ring *desc_ring = &e->rb->desc_ring; struct prb_desc *d = to_desc(desc_ring, e->id); unsigned long prev_state_val = DESC_SV(e->id, desc_reserved); - /* Now the writer has finished all writing: LMM(prb_commit:A) */ + /* Now the writer has finished all writing: LMM(_prb_commit:A) */ /* * Set the descriptor as committed. See "ABA Issues" about why * cmpxchg() instead of set() is used. * - * Guarantee all record data is stored before the descriptor state - * is stored as committed. A write memory barrier is sufficient for - * this. This pairs with desc_read:B. + * 1 Guarantee all record data is stored before the descriptor state + * is stored as committed. A write memory barrier is sufficient + * for this. This pairs with desc_read:B and desc_reopen_last:A. + * + * 2. Guarantee the descriptor state is stored as committed before + * re-checking the head ID in order to possibly finalize this + * descriptor. This pairs with desc_reserve:D. + * + * Memory barrier involvement: + * + * If prb_commit:A reads from desc_reserve:D, then + * desc_make_final:A reads from _prb_commit:B. + * + * Relies on: + * + * MB _prb_commit:B to prb_commit:A + * matching + * MB desc_reserve:D to desc_make_final:A */ if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, - DESC_SV(e->id, desc_committed))) { /* LMM(prb_commit:B) */ + DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */ WARN_ON_ONCE(1); } @@ -1261,6 +1624,59 @@ void prb_commit(struct prb_reserved_entry *e) local_irq_restore(e->irqflags); } +/** + * prb_commit() - Commit (previously reserved) data to the ringbuffer. + * + * @e: The entry containing the reserved data information. + * + * This is the public function available to writers to commit data. + * + * Note that the data is not yet available to readers until it is finalized. + * Finalizing happens automatically when space for the next record is + * reserved. + * + * See prb_final_commit() for a version of this function that finalizes + * immediately. + * + * Context: Any context. Enables local interrupts. + */ +void prb_commit(struct prb_reserved_entry *e) +{ + struct prb_desc_ring *desc_ring = &e->rb->desc_ring; + unsigned long head_id; + + _prb_commit(e, desc_committed); + + /* + * If this descriptor is no longer the head (i.e. a new record has + * been allocated), extending the data for this record is no longer + * allowed and therefore it must be finalized. + */ + head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */ + if (head_id != e->id) + desc_make_final(desc_ring, e->id); +} + +/** + * prb_final_commit() - Commit and finalize (previously reserved) data to + * the ringbuffer. + * + * @e: The entry containing the reserved data information. + * + * This is the public function available to writers to commit+finalize data. + * + * By finalizing, the data is made immediately available to readers. + * + * This function should only be used if there are no intentions of extending + * this data using prb_reserve_in_last(). + * + * Context: Any context. Enables local interrupts. + */ +void prb_final_commit(struct prb_reserved_entry *e) +{ + _prb_commit(e, desc_finalized); +} + /* * Count the number of lines in provided text. All text has at least 1 line * (even if @text_size is 0). Each '\n' processed is counted as an additional @@ -1312,7 +1728,7 @@ static bool copy_data(struct prb_data_ring *data_ring, * because of the trailing alignment padding. */ if (WARN_ON_ONCE(data_size < (unsigned int)len)) { - pr_warn_once("wrong data size (%u, expecting %hu) for data: %.*s\n", + pr_warn_once("wrong data size (%u, expecting >=%hu) for data: %.*s\n", data_size, len, data_size, data); return false; } @@ -1333,16 +1749,16 @@ static bool copy_data(struct prb_data_ring *data_ring, /* * This is an extended version of desc_read(). It gets a copy of a specified - * descriptor. However, it also verifies that the record is committed and has + * descriptor. However, it also verifies that the record is finalized and has * the sequence number @seq. On success, 0 is returned. * * Error return values: - * -EINVAL: A committed record with sequence number @seq does not exist. - * -ENOENT: A committed record with sequence number @seq exists, but its data + * -EINVAL: A finalized record with sequence number @seq does not exist. + * -ENOENT: A finalized record with sequence number @seq exists, but its data * is not available. This is a valid record, so readers should * continue with the next record. */ -static int desc_read_committed_seq(struct prb_desc_ring *desc_ring, +static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring, unsigned long id, u64 seq, struct prb_desc *desc_out) { @@ -1353,11 +1769,12 @@ static int desc_read_committed_seq(struct prb_desc_ring *desc_ring, /* * An unexpected @id (desc_miss) or @seq mismatch means the record - * does not exist. A descriptor in the reserved state means the - * record does not yet exist for the reader. + * does not exist. A descriptor in the reserved or committed state + * means the record does not yet exist for the reader. */ if (d_state == desc_miss || d_state == desc_reserved || + d_state == desc_committed || desc_out->info.seq != seq) { return -EINVAL; } @@ -1379,7 +1796,7 @@ static int desc_read_committed_seq(struct prb_desc_ring *desc_ring, * Copy the ringbuffer data from the record with @seq to the provided * @r buffer. On success, 0 is returned. * - * See desc_read_committed_seq() for error return values. + * See desc_read_finalized_seq() for error return values. */ static int prb_read(struct printk_ringbuffer *rb, u64 seq, struct printk_record *r, unsigned int *line_count) @@ -1395,7 +1812,7 @@ static int prb_read(struct printk_ringbuffer *rb, u64 seq, id = DESC_ID(atomic_long_read(state_var)); /* Get a local copy of the correct descriptor (if available). */ - err = desc_read_committed_seq(desc_ring, id, seq, &desc); + err = desc_read_finalized_seq(desc_ring, id, seq, &desc); /* * If @r is NULL, the caller is only interested in the availability @@ -1425,8 +1842,8 @@ static int prb_read(struct printk_ringbuffer *rb, u64 seq, r->info->dict_len = 0; } - /* Ensure the record is still committed and has the same @seq. */ - return desc_read_committed_seq(desc_ring, id, seq, &desc); + /* Ensure the record is still finalized and has the same @seq. */ + return desc_read_finalized_seq(desc_ring, id, seq, &desc); } /* Get the sequence number of the tail descriptor. */ @@ -1444,9 +1861,9 @@ static u64 prb_first_seq(struct printk_ringbuffer *rb) /* * This loop will not be infinite because the tail is - * _always_ in the committed or reusable state. + * _always_ in the finalized or reusable state. */ - if (d_state == desc_committed || d_state == desc_reusable) + if (d_state == desc_finalized || d_state == desc_reusable) break; /* @@ -1473,8 +1890,8 @@ static u64 prb_first_seq(struct printk_ringbuffer *rb) } /* - * Non-blocking read of a record. Updates @seq to the last committed record - * (which may have no data). + * Non-blocking read of a record. Updates @seq to the last finalized record + * (which may have no data available). * * See the description of prb_read_valid() and prb_read_valid_info() * for details. @@ -1500,7 +1917,7 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, (*seq)++; } else { - /* Non-existent/non-committed record. Must stop. */ + /* Non-existent/non-finalized record. Must stop. */ return false; } } diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index a9d85a6727b1..853ea62dc5f2 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -116,7 +116,8 @@ struct prb_reserved_entry { enum desc_state { desc_miss = -1, /* ID mismatch (pseudo state) */ desc_reserved = 0x0, /* reserved, in use by writer */ - desc_committed = 0x1, /* committed by writer */ + desc_committed = 0x1, /* committed by writer, could get reopened */ + desc_finalized = 0x2, /* committed, no further modification allowed */ desc_reusable = 0x3, /* free, not yet used by any writer */ }; @@ -327,7 +328,10 @@ static inline void prb_rec_init_wr(struct printk_record *r, bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, struct printk_record *r); +bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, + struct printk_record *r, u32 caller_id); void prb_commit(struct prb_reserved_entry *e); +void prb_final_commit(struct prb_reserved_entry *e); void prb_init(struct printk_ringbuffer *rb, char *text_buf, unsigned int text_buf_size, diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py index dd8c0b95063a..bce14de5f610 100644 --- a/scripts/gdb/linux/dmesg.py +++ b/scripts/gdb/linux/dmesg.py @@ -79,6 +79,7 @@ class LxDmesg(gdb.Command): # definitions from kernel/printk/printk_ringbuffer.h desc_committed = 1 + desc_finalized = 2 desc_sv_bits = utils.get_long_type().sizeof * 8 desc_flags_shift = desc_sv_bits - 2 desc_flags_mask = 3 << desc_flags_shift @@ -98,7 +99,7 @@ class LxDmesg(gdb.Command): # skip non-committed record state = 3 & (utils.read_u64(descs, desc_off + sv_off + counter_off) >> desc_flags_shift) - if state != desc_committed: + if state != desc_committed and state != desc_finalized: if did == head_id: break did = (did + 1) & desc_id_mask -- cgit v1.2.3 From 74caba7f2a0685575b3ee5330a118f5922485e02 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 21 Sep 2020 13:24:45 +0206 Subject: printk: move dictionary keys to dev_printk_info Dictionaries are only used for SUBSYSTEM and DEVICE properties. The current implementation stores the property names each time they are used. This requires more space than otherwise necessary. Also, because the dictionary entries are currently considered optional, it cannot be relied upon that they are always available, even if the writer wanted to store them. These issues will increase should new dictionary properties be introduced. Rather than storing the subsystem and device properties in the dict ring, introduce a struct dev_printk_info with separate fields to store only the property values. Embed this struct within the struct printk_info to provide guaranteed availability. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/87mu1jl6ne.fsf@jogness.linutronix.de --- Documentation/admin-guide/kdump/gdbmacros.txt | 73 +++++------ drivers/base/core.c | 46 +++---- include/linux/dev_printk.h | 8 ++ include/linux/printk.h | 6 +- kernel/printk/internal.h | 4 +- kernel/printk/printk.c | 166 +++++++++++++------------- kernel/printk/printk_ringbuffer.h | 3 + kernel/printk/printk_safe.c | 2 +- scripts/gdb/linux/dmesg.py | 16 ++- 9 files changed, 164 insertions(+), 160 deletions(-) (limited to 'Documentation/admin-guide') diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt index 94fabb165abf..82aecdcae8a6 100644 --- a/Documentation/admin-guide/kdump/gdbmacros.txt +++ b/Documentation/admin-guide/kdump/gdbmacros.txt @@ -172,13 +172,13 @@ end define dump_record set var $desc = $arg0 - if ($argc > 1) - set var $prev_flags = $arg1 + set var $info = $arg1 + if ($argc > 2) + set var $prev_flags = $arg2 else set var $prev_flags = 0 end - set var $info = &$desc->info set var $prefix = 1 set var $newline = 1 @@ -237,44 +237,36 @@ define dump_record # handle dictionary data - set var $begin = $desc->dict_blk_lpos.begin % (1U << prb->dict_data_ring.size_bits) - set var $next = $desc->dict_blk_lpos.next % (1U << prb->dict_data_ring.size_bits) - - # handle data-less record - if ($begin & 1) - set var $dict_len = 0 - set var $dict = "" - else - # handle wrapping data block - if ($begin > $next) - set var $begin = 0 - end - - # skip over descriptor id - set var $begin = $begin + sizeof(long) - - # handle truncated message - if ($next - $begin < $info->dict_len) - set var $dict_len = $next - $begin - else - set var $dict_len = $info->dict_len + set var $dict = &$info->dev_info.subsystem[0] + set var $dict_len = sizeof($info->dev_info.subsystem) + if ($dict[0] != '\0') + printf " SUBSYSTEM=" + set var $idx = 0 + while ($idx < $dict_len) + set var $c = $dict[$idx] + if ($c == '\0') + loop_break + else + if ($c < ' ' || $c >= 127 || $c == '\\') + printf "\\x%02x", $c + else + printf "%c", $c + end + end + set var $idx = $idx + 1 end - - set var $dict = &prb->dict_data_ring.data[$begin] + printf "\n" end - if ($dict_len > 0) + set var $dict = &$info->dev_info.device[0] + set var $dict_len = sizeof($info->dev_info.device) + if ($dict[0] != '\0') + printf " DEVICE=" set var $idx = 0 - set var $line = 1 while ($idx < $dict_len) - if ($line) - printf " " - set var $line = 0 - end set var $c = $dict[$idx] if ($c == '\0') - printf "\n" - set var $line = 1 + loop_break else if ($c < ' ' || $c >= 127 || $c == '\\') printf "\\x%02x", $c @@ -288,10 +280,10 @@ define dump_record end end document dump_record - Dump a single record. The first parameter is the descriptor - sequence number, the second is optional and specifies the - previous record's flags, used for properly formatting - continued lines. + Dump a single record. The first parameter is the descriptor, + the second parameter is the info, the third parameter is + optional and specifies the previous record's flags, used for + properly formatting continued lines. end define dmesg @@ -311,12 +303,13 @@ define dmesg while (1) set var $desc = &prb->desc_ring.descs[$id % $desc_count] + set var $info = &prb->desc_ring.infos[$id % $desc_count] # skip non-committed record set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift) if ($state == $desc_committed || $state == $desc_finalized) - dump_record $desc $prev_flags - set var $prev_flags = $desc->info.flags + dump_record $desc $info $prev_flags + set var $prev_flags = $info->flags end set var $id = ($id + 1) & $id_mask diff --git a/drivers/base/core.c b/drivers/base/core.c index 67d39a90b45c..22d83aedb64e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3815,22 +3815,21 @@ void device_shutdown(void) */ #ifdef CONFIG_PRINTK -static int -create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen) +static void +set_dev_info(const struct device *dev, struct dev_printk_info *dev_info) { const char *subsys; - size_t pos = 0; + + memset(dev_info, 0, sizeof(*dev_info)); if (dev->class) subsys = dev->class->name; else if (dev->bus) subsys = dev->bus->name; else - return 0; + return; - pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys); - if (pos >= hdrlen) - goto overflow; + strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem)); /* * Add device identifier DEVICE=: @@ -3846,41 +3845,28 @@ create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen) c = 'b'; else c = 'c'; - pos++; - pos += snprintf(hdr + pos, hdrlen - pos, - "DEVICE=%c%u:%u", - c, MAJOR(dev->devt), MINOR(dev->devt)); + + snprintf(dev_info->device, sizeof(dev_info->device), + "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt)); } else if (strcmp(subsys, "net") == 0) { struct net_device *net = to_net_dev(dev); - pos++; - pos += snprintf(hdr + pos, hdrlen - pos, - "DEVICE=n%u", net->ifindex); + snprintf(dev_info->device, sizeof(dev_info->device), + "n%u", net->ifindex); } else { - pos++; - pos += snprintf(hdr + pos, hdrlen - pos, - "DEVICE=+%s:%s", subsys, dev_name(dev)); + snprintf(dev_info->device, sizeof(dev_info->device), + "+%s:%s", subsys, dev_name(dev)); } - - if (pos >= hdrlen) - goto overflow; - - return pos; - -overflow: - dev_WARN(dev, "device/subsystem name too long"); - return 0; } int dev_vprintk_emit(int level, const struct device *dev, const char *fmt, va_list args) { - char hdr[128]; - size_t hdrlen; + struct dev_printk_info dev_info; - hdrlen = create_syslog_header(dev, hdr, sizeof(hdr)); + set_dev_info(dev, &dev_info); - return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args); + return vprintk_emit(0, level, &dev_info, fmt, args); } EXPORT_SYMBOL(dev_vprintk_emit); diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h index 3028b644b4fb..6f009559ee54 100644 --- a/include/linux/dev_printk.h +++ b/include/linux/dev_printk.h @@ -21,6 +21,14 @@ struct device; +#define PRINTK_INFO_SUBSYSTEM_LEN 16 +#define PRINTK_INFO_DEVICE_LEN 48 + +struct dev_printk_info { + char subsystem[PRINTK_INFO_SUBSYSTEM_LEN]; + char device[PRINTK_INFO_DEVICE_LEN]; +}; + #ifdef CONFIG_PRINTK __printf(3, 0) __cold diff --git a/include/linux/printk.h b/include/linux/printk.h index fc8f03c54543..071500ee7281 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -158,10 +158,12 @@ static inline void printk_nmi_direct_enter(void) { } static inline void printk_nmi_direct_exit(void) { } #endif /* PRINTK_NMI */ +struct dev_printk_info; + #ifdef CONFIG_PRINTK -asmlinkage __printf(5, 0) +asmlinkage __printf(4, 0) int vprintk_emit(int facility, int level, - const char *dict, size_t dictlen, + const struct dev_printk_info *dev_info, const char *fmt, va_list args); asmlinkage __printf(1, 0) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 660f9a6bf73a..3a8fd491758c 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -14,9 +14,9 @@ extern raw_spinlock_t logbuf_lock; -__printf(5, 0) +__printf(4, 0) int vprintk_store(int facility, int level, - const char *dict, size_t dictlen, + const struct dev_printk_info *dev_info, const char *fmt, va_list args); __printf(1, 0) int vprintk_default(const char *fmt, va_list args); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 25cfe4fe48af..f1e243cc284a 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -296,8 +296,8 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; /* * The printk log buffer consists of a sequenced collection of records, each - * containing variable length message and dictionary text. Every record - * also contains its own meta-data (@info). + * containing variable length message text. Every record also contains its + * own meta-data (@info). * * Every record meta-data carries the timestamp in microseconds, as well as * the standard userspace syslog level and syslog facility. The usual kernel @@ -310,9 +310,7 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; * terminated. * * Optionally, a record can carry a dictionary of properties (key/value - * pairs), to provide userspace with a machine-readable message context. The - * length of the dictionary is available in @dict_len. The dictionary is not - * terminated. + * pairs), to provide userspace with a machine-readable message context. * * Examples for well-defined, commonly used property names are: * DEVICE=b12:8 device identifier @@ -322,21 +320,20 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; * +sound:card0 subsystem:devname * SUBSYSTEM=pci driver-core subsystem name * - * Valid characters in property names are [a-zA-Z0-9.-_]. The plain text value - * follows directly after a '=' character. Every property is terminated by - * a '\0' character. The last property is not terminated. + * Valid characters in property names are [a-zA-Z0-9.-_]. Property names + * and values are terminated by a '\0' character. * * Example of record values: - * record.text_buf = "it's a line" (unterminated) - * record.dict_buf = "DEVICE=b8:2\0DRIVER=bug" (unterminated) - * record.info.seq = 56 - * record.info.ts_nsec = 36863 - * record.info.text_len = 11 - * record.info.dict_len = 22 - * record.info.facility = 0 (LOG_KERN) - * record.info.flags = 0 - * record.info.level = 3 (LOG_ERR) - * record.info.caller_id = 299 (task 299) + * record.text_buf = "it's a line" (unterminated) + * record.info.seq = 56 + * record.info.ts_nsec = 36863 + * record.info.text_len = 11 + * record.info.facility = 0 (LOG_KERN) + * record.info.flags = 0 + * record.info.level = 3 (LOG_ERR) + * record.info.caller_id = 299 (task 299) + * record.info.dev_info.subsystem = "pci" (terminated) + * record.info.dev_info.device = "+pci:0000:00:01.0" (terminated) * * The 'struct printk_info' buffer must never be directly exported to * userspace, it is a kernel-private implementation detail that might @@ -498,19 +495,19 @@ static void truncate_msg(u16 *text_len, u16 *trunc_msg_len) /* insert record into the buffer, discard old ones, update heads */ static int log_store(u32 caller_id, int facility, int level, enum log_flags flags, u64 ts_nsec, - const char *dict, u16 dict_len, + const struct dev_printk_info *dev_info, const char *text, u16 text_len) { struct prb_reserved_entry e; struct printk_record r; u16 trunc_msg_len = 0; - prb_rec_init_wr(&r, text_len, dict_len); + prb_rec_init_wr(&r, text_len, 0); if (!prb_reserve(&e, prb, &r)) { /* truncate the message if it is too long for empty buffer */ truncate_msg(&text_len, &trunc_msg_len); - prb_rec_init_wr(&r, text_len + trunc_msg_len, dict_len); + prb_rec_init_wr(&r, text_len + trunc_msg_len, 0); /* survive when the log buffer is too small for trunc_msg */ if (!prb_reserve(&e, prb, &r)) return 0; @@ -521,10 +518,6 @@ static int log_store(u32 caller_id, int facility, int level, if (trunc_msg_len) memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); r.info->text_len = text_len + trunc_msg_len; - if (r.dict_buf) { - memcpy(&r.dict_buf[0], dict, dict_len); - r.info->dict_len = dict_len; - } r.info->facility = facility; r.info->level = level & 7; r.info->flags = flags & 0x1f; @@ -533,6 +526,8 @@ static int log_store(u32 caller_id, int facility, int level, else r.info->ts_nsec = local_clock(); r.info->caller_id = caller_id; + if (dev_info) + memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); /* insert message */ if ((flags & LOG_CONT) || !(flags & LOG_NEWLINE)) @@ -613,9 +608,9 @@ static ssize_t info_print_ext_header(char *buf, size_t size, ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller); } -static ssize_t msg_print_ext_body(char *buf, size_t size, - char *dict, size_t dict_len, - char *text, size_t text_len) +static ssize_t msg_add_ext_text(char *buf, size_t size, + const char *text, size_t text_len, + unsigned char endc) { char *p = buf, *e = buf + size; size_t i; @@ -629,36 +624,44 @@ static ssize_t msg_print_ext_body(char *buf, size_t size, else append_char(&p, e, c); } - append_char(&p, e, '\n'); + append_char(&p, e, endc); - if (dict_len) { - bool line = true; + return p - buf; +} - for (i = 0; i < dict_len; i++) { - unsigned char c = dict[i]; +static ssize_t msg_add_dict_text(char *buf, size_t size, + const char *key, const char *val) +{ + size_t val_len = strlen(val); + ssize_t len; - if (line) { - append_char(&p, e, ' '); - line = false; - } + if (!val_len) + return 0; - if (c == '\0') { - append_char(&p, e, '\n'); - line = true; - continue; - } + len = msg_add_ext_text(buf, size, "", 0, ' '); /* dict prefix */ + len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '='); + len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n'); - if (c < ' ' || c >= 127 || c == '\\') { - p += scnprintf(p, e - p, "\\x%02x", c); - continue; - } + return len; +} - append_char(&p, e, c); - } - append_char(&p, e, '\n'); - } +static ssize_t msg_print_ext_body(char *buf, size_t size, + char *text, size_t text_len, + struct dev_printk_info *dev_info) +{ + ssize_t len; - return p - buf; + len = msg_add_ext_text(buf, size, text, text_len, '\n'); + + if (!dev_info) + goto out; + + len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM", + dev_info->subsystem); + len += msg_add_dict_text(buf + len, size - len, "DEVICE", + dev_info->device); +out: + return len; } /* /dev/kmsg - userspace message inject/listen interface */ @@ -670,7 +673,6 @@ struct devkmsg_user { struct printk_info info; char text_buf[CONSOLE_EXT_LOG_MAX]; - char dict_buf[CONSOLE_EXT_LOG_MAX]; struct printk_record record; }; @@ -681,7 +683,7 @@ int devkmsg_emit(int facility, int level, const char *fmt, ...) int r; va_start(args, fmt); - r = vprintk_emit(facility, level, NULL, 0, fmt, args); + r = vprintk_emit(facility, level, NULL, fmt, args); va_end(args); return r; @@ -791,8 +793,8 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, len = info_print_ext_header(user->buf, sizeof(user->buf), r->info); len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, - &r->dict_buf[0], r->info->dict_len, - &r->text_buf[0], r->info->text_len); + &r->text_buf[0], r->info->text_len, + &r->info->dev_info); user->seq = r->info->seq + 1; logbuf_unlock_irq(); @@ -897,7 +899,7 @@ static int devkmsg_open(struct inode *inode, struct file *file) prb_rec_init_rd(&user->record, &user->info, &user->text_buf[0], sizeof(user->text_buf), - &user->dict_buf[0], sizeof(user->dict_buf)); + NULL, 0); logbuf_lock_irq(); user->seq = prb_first_valid_seq(prb); @@ -941,6 +943,8 @@ const struct file_operations kmsg_fops = { */ void log_buf_vmcoreinfo_setup(void) { + struct dev_printk_info *dev_info = NULL; + VMCOREINFO_SYMBOL(prb); VMCOREINFO_SYMBOL(printk_rb_static); VMCOREINFO_SYMBOL(clear_seq); @@ -978,6 +982,13 @@ void log_buf_vmcoreinfo_setup(void) VMCOREINFO_OFFSET(printk_info, text_len); VMCOREINFO_OFFSET(printk_info, dict_len); VMCOREINFO_OFFSET(printk_info, caller_id); + VMCOREINFO_OFFSET(printk_info, dev_info); + + VMCOREINFO_STRUCT_SIZE(dev_printk_info); + VMCOREINFO_OFFSET(dev_printk_info, subsystem); + VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem)); + VMCOREINFO_OFFSET(dev_printk_info, device); + VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device)); VMCOREINFO_STRUCT_SIZE(prb_data_ring); VMCOREINFO_OFFSET(prb_data_ring, size_bits); @@ -1070,22 +1081,19 @@ static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, struct prb_reserved_entry e; struct printk_record dest_r; - prb_rec_init_wr(&dest_r, r->info->text_len, r->info->dict_len); + prb_rec_init_wr(&dest_r, r->info->text_len, 0); if (!prb_reserve(&e, rb, &dest_r)) return 0; memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len); dest_r.info->text_len = r->info->text_len; - if (dest_r.dict_buf) { - memcpy(&dest_r.dict_buf[0], &r->dict_buf[0], r->info->dict_len); - dest_r.info->dict_len = r->info->dict_len; - } dest_r.info->facility = r->info->facility; dest_r.info->level = r->info->level; dest_r.info->flags = r->info->flags; dest_r.info->ts_nsec = r->info->ts_nsec; dest_r.info->caller_id = r->info->caller_id; + memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info)); prb_final_commit(&e); @@ -1093,7 +1101,6 @@ static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, } static char setup_text_buf[CONSOLE_EXT_LOG_MAX] __initdata; -static char setup_dict_buf[CONSOLE_EXT_LOG_MAX] __initdata; void __init setup_log_buf(int early) { @@ -1165,7 +1172,7 @@ void __init setup_log_buf(int early) prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf), - &setup_dict_buf[0], sizeof(setup_dict_buf)); + NULL, 0); prb_init(&printk_rb_dynamic, new_log_buf, ilog2(new_log_buf_len), @@ -1903,7 +1910,9 @@ static inline u32 printk_caller_id(void) 0x80000000 + raw_smp_processor_id(); } -static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len) +static size_t log_output(int facility, int level, enum log_flags lflags, + const struct dev_printk_info *dev_info, + char *text, size_t text_len) { const u32 caller_id = printk_caller_id(); @@ -1927,12 +1936,12 @@ static size_t log_output(int facility, int level, enum log_flags lflags, const c /* Store it in the record log */ return log_store(caller_id, facility, level, lflags, 0, - dict, dictlen, text, text_len); + dev_info, text, text_len); } /* Must be called under logbuf_lock. */ int vprintk_store(int facility, int level, - const char *dict, size_t dictlen, + const struct dev_printk_info *dev_info, const char *fmt, va_list args) { static char textbuf[LOG_LINE_MAX]; @@ -1974,15 +1983,14 @@ int vprintk_store(int facility, int level, if (level == LOGLEVEL_DEFAULT) level = default_message_loglevel; - if (dict) + if (dev_info) lflags |= LOG_NEWLINE; - return log_output(facility, level, lflags, - dict, dictlen, text, text_len); + return log_output(facility, level, lflags, dev_info, text, text_len); } asmlinkage int vprintk_emit(int facility, int level, - const char *dict, size_t dictlen, + const struct dev_printk_info *dev_info, const char *fmt, va_list args) { int printed_len; @@ -2003,7 +2011,7 @@ asmlinkage int vprintk_emit(int facility, int level, /* This stops the holder of console_sem just where we want him */ logbuf_lock_irqsave(flags); - printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args); + printed_len = vprintk_store(facility, level, dev_info, fmt, args); logbuf_unlock_irqrestore(flags); /* If called from the scheduler, we can not call up(). */ @@ -2037,7 +2045,7 @@ EXPORT_SYMBOL(vprintk); int vprintk_default(const char *fmt, va_list args) { - return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); + return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); } EXPORT_SYMBOL_GPL(vprintk_default); @@ -2100,8 +2108,8 @@ static ssize_t info_print_ext_header(char *buf, size_t size, return 0; } static ssize_t msg_print_ext_body(char *buf, size_t size, - char *dict, size_t dict_len, - char *text, size_t text_len) { return 0; } + char *text, size_t text_len, + struct dev_printk_info *dev_info) { return 0; } static void console_lock_spinning_enable(void) { } static int console_lock_spinning_disable_and_check(void) { return 0; } static void call_console_drivers(const char *ext_text, size_t ext_len, @@ -2390,7 +2398,6 @@ void console_unlock(void) { static char ext_text[CONSOLE_EXT_LOG_MAX]; static char text[LOG_LINE_MAX + PREFIX_MAX]; - static char dict[LOG_LINE_MAX]; unsigned long flags; bool do_cond_resched, retry; struct printk_info info; @@ -2401,7 +2408,7 @@ void console_unlock(void) return; } - prb_rec_init_rd(&r, &info, text, sizeof(text), dict, sizeof(dict)); + prb_rec_init_rd(&r, &info, text, sizeof(text), NULL, 0); /* * Console drivers are called with interrupts disabled, so @@ -2473,10 +2480,9 @@ skip: r.info); ext_len += msg_print_ext_body(ext_text + ext_len, sizeof(ext_text) - ext_len, - &r.dict_buf[0], - r.info->dict_len, &r.text_buf[0], - r.info->text_len); + r.info->text_len, + &r.info->dev_info); } len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, @@ -3055,7 +3061,7 @@ int vprintk_deferred(const char *fmt, va_list args) { int r; - r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); + r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); defer_console_output(); return r; diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h index 97c8561e74e0..480499ce3c6b 100644 --- a/kernel/printk/printk_ringbuffer.h +++ b/kernel/printk/printk_ringbuffer.h @@ -4,6 +4,7 @@ #define _KERNEL_PRINTK_RINGBUFFER_H #include +#include /* * Meta information about each stored message. @@ -21,6 +22,8 @@ struct printk_info { u8 flags:5; /* internal record flags */ u8 level:3; /* syslog level */ u32 caller_id; /* thread id or processor id */ + + struct dev_printk_info dev_info; }; /* diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 50aeae770434..5dbc40160990 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -375,7 +375,7 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args) raw_spin_trylock(&logbuf_lock)) { int len; - len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); + len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); raw_spin_unlock(&logbuf_lock); defer_console_output(); return len; diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py index bce14de5f610..a92c55bd8de5 100644 --- a/scripts/gdb/linux/dmesg.py +++ b/scripts/gdb/linux/dmesg.py @@ -52,6 +52,12 @@ class LxDmesg(gdb.Command): addr = utils.read_ulong(desc_ring, off) descs = utils.read_memoryview(inf, addr, desc_sz * desc_ring_count).tobytes() + # read in info array + info_sz = printk_info_type.get_type().sizeof + off = prb_desc_ring_type.get_type()['infos'].bitpos // 8 + addr = utils.read_ulong(desc_ring, off) + infos = utils.read_memoryview(inf, addr, info_sz * desc_ring_count).tobytes() + # read in text data ring structure off = printk_ringbuffer_type.get_type()['text_data_ring'].bitpos // 8 addr = prb_addr + off @@ -73,9 +79,8 @@ class LxDmesg(gdb.Command): begin_off = off + (prb_data_blk_lpos_type.get_type()['begin'].bitpos // 8) next_off = off + (prb_data_blk_lpos_type.get_type()['next'].bitpos // 8) - off = prb_desc_type.get_type()['info'].bitpos // 8 - ts_off = off + printk_info_type.get_type()['ts_nsec'].bitpos // 8 - len_off = off + printk_info_type.get_type()['text_len'].bitpos // 8 + ts_off = printk_info_type.get_type()['ts_nsec'].bitpos // 8 + len_off = printk_info_type.get_type()['text_len'].bitpos // 8 # definitions from kernel/printk/printk_ringbuffer.h desc_committed = 1 @@ -95,6 +100,7 @@ class LxDmesg(gdb.Command): while True: ind = did % desc_ring_count desc_off = desc_sz * ind + info_off = info_sz * ind # skip non-committed record state = 3 & (utils.read_u64(descs, desc_off + sv_off + @@ -119,7 +125,7 @@ class LxDmesg(gdb.Command): # skip over descriptor id text_start = begin + utils.get_long_type().sizeof - text_len = utils.read_u16(descs, desc_off + len_off) + text_len = utils.read_u16(infos, info_off + len_off) # handle truncated message if end - text_start < text_len: @@ -128,7 +134,7 @@ class LxDmesg(gdb.Command): text = text_data[text_start:text_start + text_len].decode( encoding='utf8', errors='replace') - time_stamp = utils.read_u64(descs, desc_off + ts_off) + time_stamp = utils.read_u64(infos, info_off + ts_off) for line in text.splitlines(): msg = u"[{time:12.6f}] {line}\n".format( -- cgit v1.2.3