From 8ee0d2fb4dfa3465ea2030dec59a6f6fe3005804 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Feb 2023 14:00:54 +0100 Subject: s390/setup: do not complain about parameters handled in decompressor Currently there are several kernel command line parameters which are only parsed and handled in decompressor and not known to the kernel. This leads to the following error message during kernel boot: Unknown kernel command line parameters "mem=3G nokaslr", will be passed to user space. To avoid confusion, register those parameters with an empty stub so that kernel does not complain about them. Reported-by: Gerald Schaefer Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/early.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 59eba19ae0f2..d26f02495636 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -36,6 +36,23 @@ int __bootdata(is_full_image); +#define decompressor_handled_param(param) \ +static int __init ignore_decompressor_param_##param(char *s) \ +{ \ + return 0; \ +} \ +early_param(#param, ignore_decompressor_param_##param) + +decompressor_handled_param(mem); +decompressor_handled_param(vmalloc); +decompressor_handled_param(dfltcc); +decompressor_handled_param(noexec); +decompressor_handled_param(facilities); +decompressor_handled_param(nokaslr); +#if IS_ENABLED(CONFIG_KVM) +decompressor_handled_param(prot_virt); +#endif + static void __init reset_tod_clock(void) { union tod_clock clk; -- cgit v1.2.3 From ae4b60f6b7a8d25c7253cab104468d22efcecf1a Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Thu, 16 Feb 2023 13:12:08 +0100 Subject: s390/nmi: fix virtual-physical address confusion When a machine check is received while in SIE, it is reinjected into the guest in some cases. The respective code needs to access the sie_block, which is taken from the backed up R14. Since reinjection only occurs while we are in SIE (i.e. between the labels sie_entry and sie_leave in entry.S and thus if CIF_MCCK_GUEST is set), the backed up R14 will always contain a physical address in s390_backup_mcck_info. This currently works, because virtual and physical addresses are the same. Add phys_to_virt() to resolve the virtual-physical confusion. Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Reviewed-by: Alexander Gordeev Link: https://lore.kernel.org/r/20230216121208.4390-2-nrb@linux.ibm.com Signed-off-by: Janosch Frank Signed-off-by: Heiko Carstens --- arch/s390/kernel/nmi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 5dbf274719a9..56d9c559afa1 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -346,8 +346,7 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs) struct sie_page *sie_page; /* r14 contains the sie block, which was set in sie64a */ - struct kvm_s390_sie_block *sie_block = - (struct kvm_s390_sie_block *) regs->gprs[14]; + struct kvm_s390_sie_block *sie_block = phys_to_virt(regs->gprs[14]); if (sie_block == NULL) /* Something's seriously wrong, stop system. */ -- cgit v1.2.3 From ebf95e884694b2c796ecb53d80d2b4cff8990d2f Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 17 Feb 2023 12:05:36 +0100 Subject: s390/ap,zcrypt,vfio: introduce and use ap_queue_status_reg union Introduce a new ap queue status register wrapper union to access register wide values. So the inline assembler only sees register wide values but the surrounding code may use a more structured view of the same value and a reader of the code (and the compiler) gets a clear understanding about the mapping between fields and register values. All the changes to access the ap queue status are local to the inline functions within ap.h. However, the struct ap_qirq_ctrl has been replaces by a union for same reason and this needed slight adaptions in the calling code. Suggested-by: Halil Pasic Suggested-by: Andreas Arnez Signed-off-by: Harald Freudenberger Acked-by: Heiko Carstens Reviewed-by: Holger Dengler Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ap.h | 100 ++++++++++++++++++++------------------ drivers/s390/crypto/ap_queue.c | 2 +- drivers/s390/crypto/vfio_ap_ops.c | 4 +- 3 files changed, 55 insertions(+), 51 deletions(-) diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 57a2d6518d27..c699f251a464 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -49,6 +49,19 @@ struct ap_queue_status { unsigned int _pad2 : 16; }; +/* + * AP queue status reg union to access the reg1 + * register with the lower 32 bits comprising the + * ap queue status. + */ +union ap_queue_status_reg { + unsigned long value; + struct { + u32 _pad; + struct ap_queue_status status; + }; +}; + /** * ap_intructions_available() - Test if AP instructions are available. * @@ -82,7 +95,7 @@ static inline bool ap_instructions_available(void) */ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) { - struct ap_queue_status reg1; + union ap_queue_status_reg reg1; unsigned long reg2; asm volatile( @@ -91,12 +104,12 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) " .insn rre,0xb2af0000,0,0\n" /* PQAP(TAPQ) */ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ " lgr %[reg2],2\n" /* gr2 into reg2 */ - : [reg1] "=&d" (reg1), [reg2] "=&d" (reg2) + : [reg1] "=&d" (reg1.value), [reg2] "=&d" (reg2) : [qid] "d" (qid) : "cc", "0", "1", "2"); if (info) *info = reg2; - return reg1; + return reg1.status; } /** @@ -125,16 +138,16 @@ static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, static inline struct ap_queue_status ap_rapq(ap_qid_t qid) { unsigned long reg0 = qid | (1UL << 24); /* fc 1UL is RAPQ */ - struct ap_queue_status reg1; + union ap_queue_status_reg reg1; asm volatile( " lgr 0,%[reg0]\n" /* qid arg into gr0 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(RAPQ) */ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ - : [reg1] "=&d" (reg1) + : [reg1] "=&d" (reg1.value) : [reg0] "d" (reg0) : "cc", "0", "1"); - return reg1; + return reg1.status; } /** @@ -146,16 +159,16 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid) static inline struct ap_queue_status ap_zapq(ap_qid_t qid) { unsigned long reg0 = qid | (2UL << 24); /* fc 2UL is ZAPQ */ - struct ap_queue_status reg1; + union ap_queue_status_reg reg1; asm volatile( " lgr 0,%[reg0]\n" /* qid arg into gr0 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(ZAPQ) */ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ - : [reg1] "=&d" (reg1) + : [reg1] "=&d" (reg1.value) : [reg0] "d" (reg0) : "cc", "0", "1"); - return reg1; + return reg1.status; } /** @@ -209,18 +222,21 @@ static inline int ap_qci(struct ap_config_info *config) * parameter to the PQAP(AQIC) instruction. For details please * see the AR documentation. */ -struct ap_qirq_ctrl { - unsigned int _res1 : 8; - unsigned int zone : 8; /* zone info */ - unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ - unsigned int _res2 : 4; - unsigned int gisc : 3; /* guest isc field */ - unsigned int _res3 : 6; - unsigned int gf : 2; /* gisa format */ - unsigned int _res4 : 1; - unsigned int gisa : 27; /* gisa origin */ - unsigned int _res5 : 1; - unsigned int isc : 3; /* irq sub class */ +union ap_qirq_ctrl { + unsigned long value; + struct { + unsigned int : 8; + unsigned int zone : 8; /* zone info */ + unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ + unsigned int : 4; + unsigned int gisc : 3; /* guest isc field */ + unsigned int : 6; + unsigned int gf : 2; /* gisa format */ + unsigned int : 1; + unsigned int gisa : 27; /* gisa origin */ + unsigned int : 1; + unsigned int isc : 3; /* irq sub class */ + }; }; /** @@ -232,21 +248,14 @@ struct ap_qirq_ctrl { * Returns AP queue status. */ static inline struct ap_queue_status ap_aqic(ap_qid_t qid, - struct ap_qirq_ctrl qirqctrl, + union ap_qirq_ctrl qirqctrl, phys_addr_t pa_ind) { unsigned long reg0 = qid | (3UL << 24); /* fc 3UL is AQIC */ - union { - unsigned long value; - struct ap_qirq_ctrl qirqctrl; - struct { - u32 _pad; - struct ap_queue_status status; - }; - } reg1; + union ap_queue_status_reg reg1; unsigned long reg2 = pa_ind; - reg1.qirqctrl = qirqctrl; + reg1.value = qirqctrl.value; asm volatile( " lgr 0,%[reg0]\n" /* qid param into gr0 */ @@ -254,7 +263,7 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid, " lgr 2,%[reg2]\n" /* ni addr into gr2 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(AQIC) */ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ - : [reg1] "+&d" (reg1) + : [reg1] "+&d" (reg1.value) : [reg0] "d" (reg0), [reg2] "d" (reg2) : "cc", "memory", "0", "1", "2"); @@ -291,13 +300,7 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, union ap_qact_ap_info *apinfo) { unsigned long reg0 = qid | (5UL << 24) | ((ifbit & 0x01) << 22); - union { - unsigned long value; - struct { - u32 _pad; - struct ap_queue_status status; - }; - } reg1; + union ap_queue_status_reg reg1; unsigned long reg2; reg1.value = apinfo->val; @@ -308,7 +311,7 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, " .insn rre,0xb2af0000,0,0\n" /* PQAP(QACT) */ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ " lgr %[reg2],2\n" /* qact out info into reg2 */ - : [reg1] "+&d" (reg1), [reg2] "=&d" (reg2) + : [reg1] "+&d" (reg1.value), [reg2] "=&d" (reg2) : [reg0] "d" (reg0) : "cc", "0", "1", "2"); apinfo->val = reg2; @@ -333,7 +336,7 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid, { unsigned long reg0 = qid | 0x40000000UL; /* 0x4... is last msg part */ union register_pair nqap_r1, nqap_r2; - struct ap_queue_status reg1; + union ap_queue_status_reg reg1; nqap_r1.even = (unsigned int)(psmid >> 32); nqap_r1.odd = psmid & 0xffffffff; @@ -345,11 +348,11 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid, "0: .insn rre,0xb2ad0000,%[nqap_r1],%[nqap_r2]\n" " brc 2,0b\n" /* handle partial completion */ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ - : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1), + : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value), [nqap_r2] "+&d" (nqap_r2.pair) : [nqap_r1] "d" (nqap_r1.pair) : "cc", "memory", "0", "1"); - return reg1; + return reg1.status; } /** @@ -389,7 +392,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, unsigned long *resgr0) { unsigned long reg0 = resgr0 && *resgr0 ? *resgr0 : qid | 0x80000000UL; - struct ap_queue_status reg1; + union ap_queue_status_reg reg1; unsigned long reg2; union register_pair rp1, rp2; @@ -408,8 +411,9 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, "2: lgr %[reg0],0\n" /* gr0 (qid + info) into reg0 */ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ " lgr %[reg2],2\n" /* gr2 (res length) into reg2 */ - : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1), [reg2] "=&d" (reg2), - [rp1] "+&d" (rp1.pair), [rp2] "+&d" (rp2.pair) + : [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value), + [reg2] "=&d" (reg2), [rp1] "+&d" (rp1.pair), + [rp2] "+&d" (rp2.pair) : : "cc", "memory", "0", "1", "2"); @@ -421,7 +425,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, * Signal the caller that this dqap is only partially received * with a special status response code 0xFF and *resgr0 updated */ - reg1.response_code = 0xFF; + reg1.status.response_code = 0xFF; if (resgr0) *resgr0 = reg0; } else { @@ -430,7 +434,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, *resgr0 = 0; } - return reg1; + return reg1.status; } /* diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index a32457b4cbb8..2637fe1df727 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -29,8 +29,8 @@ static void __ap_flush_queue(struct ap_queue *aq); */ static int ap_queue_enable_irq(struct ap_queue *aq, void *ind) { + union ap_qirq_ctrl qirqctrl = { .value = 0 }; struct ap_queue_status status; - struct ap_qirq_ctrl qirqctrl = { 0 }; qirqctrl.ir = 1; qirqctrl.isc = AP_ISC; diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 28a36e016ea9..72e10abb103a 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -301,7 +301,7 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) */ static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) { - struct ap_qirq_ctrl aqic_gisa = {}; + union ap_qirq_ctrl aqic_gisa = { .value = 0 }; struct ap_queue_status status; int retries = 5; @@ -384,7 +384,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, int isc, struct kvm_vcpu *vcpu) { - struct ap_qirq_ctrl aqic_gisa = {}; + union ap_qirq_ctrl aqic_gisa = { .value = 0 }; struct ap_queue_status status = {}; struct kvm_s390_gisa *gisa; struct page *h_page; -- cgit v1.2.3 From 434b26605f6cc500c1a995587e5c4bc4bc1693c6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Feb 2023 11:02:36 +0100 Subject: s390/rwonce: add READ_ONCE_ALIGNED_128() macro Add an s390 specific READ_ONCE_ALIGNED_128() helper, which can be used for fast block concurrent (atomic) 128-bit accesses. The used lpq instruction requires 128-bit alignment. This is also the reason why the compiler doesn't emit this instruction if __READ_ONCE() is used for 128-bit accesses. Link: https://lore.kernel.org/r/20230224100237.3247871-2-hca@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/include/asm/rwonce.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 arch/s390/include/asm/rwonce.h diff --git a/arch/s390/include/asm/rwonce.h b/arch/s390/include/asm/rwonce.h new file mode 100644 index 000000000000..91fc24520e82 --- /dev/null +++ b/arch/s390/include/asm/rwonce.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_S390_RWONCE_H +#define __ASM_S390_RWONCE_H + +#include + +/* + * Use READ_ONCE_ALIGNED_128() for 128-bit block concurrent (atomic) read + * accesses. Note that x must be 128-bit aligned, otherwise a specification + * exception is generated. + */ +#define READ_ONCE_ALIGNED_128(x) \ +({ \ + union { \ + typeof(x) __x; \ + __uint128_t val; \ + } __u; \ + \ + BUILD_BUG_ON(sizeof(x) != 16); \ + asm volatile( \ + " lpq %[val],%[_x]\n" \ + : [val] "=d" (__u.val) \ + : [_x] "QS" (x) \ + : "memory"); \ + __u.__x; \ +}) + +#include + +#endif /* __ASM_S390_RWONCE_H */ -- cgit v1.2.3 From 5e02c74905cb00184b6c5ae70b1c1bfae5b3bd17 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Feb 2023 11:02:37 +0100 Subject: s390/cpum_sf: use READ_ONCE_ALIGNED_128() instead of 128-bit cmpxchg Use READ_ONCE_ALIGNED_128() to read the previous value in front of a 128-bit cmpxchg loop, instead of (mis-)using a 128-bit cmpxchg operation to do the same. This makes the code more readable and is faster. Link: https://lore.kernel.org/r/20230224100237.3247871-3-hca@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_sf.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 79904a839fb9..e7b867e2f73f 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1355,8 +1355,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) num_sdb++; /* Reset trailer (using compare-double-and-swap) */ - /* READ_ONCE() 16 byte header */ - prev.val = __cdsg(&te->header.val, 0, 0); + prev.val = READ_ONCE_ALIGNED_128(te->header.val); do { old.val = prev.val; new.val = prev.val; @@ -1558,8 +1557,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, struct hws_trailer_entry *te; te = aux_sdb_trailer(aux, alert_index); - /* READ_ONCE() 16 byte header */ - prev.val = __cdsg(&te->header.val, 0, 0); + prev.val = READ_ONCE_ALIGNED_128(te->header.val); do { old.val = prev.val; new.val = prev.val; @@ -1637,8 +1635,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, idx_old = idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - /* READ_ONCE() 16 byte header */ - prev.val = __cdsg(&te->header.val, 0, 0); + prev.val = READ_ONCE_ALIGNED_128(te->header.val); do { old.val = prev.val; new.val = prev.val; -- cgit v1.2.3 From e7ec1d2eac9cad57ff615ef6cc3e324ab7238b82 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sat, 17 Dec 2022 11:01:16 +0100 Subject: s390/mcck: cleanup user process termination path If a machine check interrupt hits while user process is running __s390_handle_mcck() helper function is called directly from the interrupt handler and terminates the current process by calling make_task_dead() routine. The make_task_dead() is not allowed to be called from interrupt context which forces the machine check handler switch to the kernel stack and enable local interrupts first. The __s390_handle_mcck() could also be called to service pending work, but this time from the external interrupts handler. It is the machine check handler that establishes the work and schedules the external interrupt, therefore the machine check interrupt itself should be disabled while reading out the corresponding variable: local_mcck_disable(); mcck = *this_cpu_ptr(&cpu_mcck); memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck)); local_mcck_enable(); However, local_mcck_disable() does not have effect when __s390_handle_mcck() is called directly form the machine check handler, since the machine check interrupt is still disabled. Therefore, it is not the opening bracket to the following local_mcck_enable() function. Simplify the user process termination flow by scheduling the external interrupt and killing the affected process from the interrupt context. Assume a kernel-generated signal is always delivered and ignore a value returned by do_send_sig_info() funciton. Reviewed-by: Heiko Carstens Reviewed-by: Sven Schnelle Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/include/asm/nmi.h | 5 ++--- arch/s390/kernel/entry.S | 10 ---------- arch/s390/kernel/nmi.c | 23 +++++------------------ arch/s390/kernel/smp.c | 2 +- 4 files changed, 8 insertions(+), 32 deletions(-) diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index af1cd3a6f406..227466ce9e41 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -101,9 +101,8 @@ void nmi_alloc_mcesa_early(u64 *mcesad); int nmi_alloc_mcesa(u64 *mcesad); void nmi_free_mcesa(u64 *mcesad); -void s390_handle_mcck(struct pt_regs *regs); -void __s390_handle_mcck(void); -int s390_do_machine_check(struct pt_regs *regs); +void s390_handle_mcck(void); +void s390_do_machine_check(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_NMI_H */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index c8d8c9960936..76a06f3d3671 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -562,16 +562,6 @@ ENTRY(mcck_int_handler) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - cghi %r2,0 - je .Lmcck_return - lg %r1,__LC_KERNEL_STACK # switch to kernel stack - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lgr %r2,%r11 - lgr %r15,%r1 - brasl %r14,s390_handle_mcck -.Lmcck_return: lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 56d9c559afa1..38ec0487521c 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -156,7 +156,7 @@ NOKPROBE_SYMBOL(s390_handle_damage); * Main machine check handler function. Will be called with interrupts disabled * and machine checks enabled. */ -void __s390_handle_mcck(void) +void s390_handle_mcck(void) { struct mcck_struct mcck; @@ -192,23 +192,16 @@ void __s390_handle_mcck(void) if (mcck.stp_queue) stp_queue_work(); if (mcck.kill_task) { - local_irq_enable(); printk(KERN_EMERG "mcck: Terminating task because of machine " "malfunction (code 0x%016lx).\n", mcck.mcck_code); printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", current->comm, current->pid); - make_task_dead(SIGSEGV); + if (is_global_init(current)) + panic("mcck: Attempting to kill init!\n"); + do_send_sig_info(SIGKILL, SEND_SIG_PRIV, current, PIDTYPE_PID); } } -void noinstr s390_handle_mcck(struct pt_regs *regs) -{ - trace_hardirqs_off(); - pai_kernel_enter(regs); - __s390_handle_mcck(); - pai_kernel_exit(regs); - trace_hardirqs_on(); -} /* * returns 0 if register contents could be validated * returns 1 otherwise @@ -373,7 +366,7 @@ NOKPROBE_SYMBOL(s390_backup_mcck_info); /* * machine check handler. */ -int notrace s390_do_machine_check(struct pt_regs *regs) +void notrace s390_do_machine_check(struct pt_regs *regs) { static int ipd_count; static DEFINE_SPINLOCK(ipd_lock); @@ -503,16 +496,10 @@ int notrace s390_do_machine_check(struct pt_regs *regs) } clear_cpu_flag(CIF_MCCK_GUEST); - if (user_mode(regs) && mcck_pending) { - irqentry_nmi_exit(regs, irq_state); - return 1; - } - if (mcck_pending) schedule_mcck_handler(); irqentry_nmi_exit(regs, irq_state); - return 0; } NOKPROBE_SYMBOL(s390_do_machine_check); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 23c427284773..97961522b317 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -522,7 +522,7 @@ static void smp_handle_ext_call(void) if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); if (test_bit(ec_mcck_pending, &bits)) - __s390_handle_mcck(); + s390_handle_mcck(); if (test_bit(ec_irq_work, &bits)) irq_work_run(); } -- cgit v1.2.3 From e688c6255b742428ea8fa7e4fb8181a6135205e9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Feb 2023 18:56:10 +0100 Subject: s390/smp: perform cpu reset before delegating work to target cpu Clear CPU state (e.g. all TLB entries, prefetched instructions, etc.) of the target CPU, however without clearing register contents before starting any work on it. This puts the target CPU in a more defined state compared to the current Stop + Restart sigp orders. Signed-off-by: Heiko Carstens --- arch/s390/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 97961522b317..d4888453bbf8 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -333,6 +333,7 @@ static void pcpu_delegate(struct pcpu *pcpu, } /* Stop target cpu (if func returns this stops the current cpu). */ pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + pcpu_sigp_retry(pcpu, SIGP_CPU_RESET, 0); /* Restart func on the target cpu and stop the current cpu. */ if (lc) { lc->restart_stack = stack; -- cgit v1.2.3 From 9b5c37bbf659fe4edb4804bc0aa99840d6798878 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 16 Sep 2020 13:00:18 +0200 Subject: s390/decompressor: add link map saving Produce arch/s390/boot/vmlinux.map link map for the decompressor, when CONFIG_VMLINUX_MAP option is enabled. Link map is quite useful during making kernel changes related to how the decompressor is composed and debugging linker scripts. Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 47a397da0498..cebd4ca16916 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -52,6 +52,8 @@ targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all) OBJECTS := $(addprefix $(obj)/,$(obj-y)) OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all)) +clean-files += vmlinux.map + quiet_cmd_section_cmp = SECTCMP $* define cmd_section_cmp s1=`$(OBJDUMP) -t -j "$*" "$<" | sort | \ @@ -71,7 +73,7 @@ $(obj)/bzImage: $(obj)/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.b $(obj)/section_cmp%: vmlinux $(obj)/vmlinux FORCE $(call if_changed,section_cmp) -LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T +LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup $(if $(CONFIG_VMLINUX_MAP),-Map=$(obj)/vmlinux.map) --build-id=sha1 -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS_ALL) FORCE $(call if_changed,ld) -- cgit v1.2.3 From 8c42dd78df148c90e48efff204cce38743906a79 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 27 Feb 2023 20:03:00 +0100 Subject: s390/extmem: return correct segment type in __segment_load() Commit f05f62d04271f ("s390/vmem: get rid of memory segment list") reshuffled the call to vmem_add_mapping() in __segment_load(), which now overwrites rc after it was set to contain the segment type code. As result, __segment_load() will now always return 0 on success, which corresponds to the segment type code SEG_TYPE_SW, i.e. a writeable segment. This results in a kernel crash when loading a read-only segment as dcssblk block device, and trying to write to it. Instead of reshuffling code again, make sure to return the segment type on success, and also describe this rather delicate and unexpected logic in the function comment. Also initialize new segtype variable with invalid value, to prevent possible future confusion. Fixes: f05f62d04271 ("s390/vmem: get rid of memory segment list") Cc: # 5.9+ Signed-off-by: Gerald Schaefer Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/mm/extmem.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 5060956b8e7d..1bc42ce26599 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -289,15 +289,17 @@ segment_overlaps_others (struct dcss_segment *seg) /* * real segment loading function, called from segment_load + * Must return either an error code < 0, or the segment type code >= 0 */ static int __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end) { unsigned long start_addr, end_addr, dummy; struct dcss_segment *seg; - int rc, diag_cc; + int rc, diag_cc, segtype; start_addr = end_addr = 0; + segtype = -1; seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA); if (seg == NULL) { rc = -ENOMEM; @@ -326,9 +328,9 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long seg->res_name[8] = '\0'; strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name)); seg->res->name = seg->res_name; - rc = seg->vm_segtype; - if (rc == SEG_TYPE_SC || - ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared)) + segtype = seg->vm_segtype; + if (segtype == SEG_TYPE_SC || + ((segtype == SEG_TYPE_SR || segtype == SEG_TYPE_ER) && !do_nonshared)) seg->res->flags |= IORESOURCE_READONLY; /* Check for overlapping resources before adding the mapping. */ @@ -386,7 +388,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long out_free: kfree(seg); out: - return rc; + return rc < 0 ? rc : segtype; } /* -- cgit v1.2.3 From 6ca6b58107a8891e4b08087843188fdc5737ec08 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Mar 2023 09:41:06 +0100 Subject: s390/Kconfig: sort config S390 select list again Keep the config S390 select list sorted. Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 933771b0b07a..e2c2f1516c26 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -125,8 +125,8 @@ config S390 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANTS_NO_INSTR select ARCH_WANT_DEFAULT_BPF_JIT - select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP + select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 select DMA_OPS if PCI @@ -187,7 +187,6 @@ config S390 select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES - select HAVE_RETHOOK select HAVE_KVM select HAVE_LIVEPATCH select HAVE_MEMBLOCK_PHYS_MAP @@ -200,6 +199,7 @@ config S390 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE + select HAVE_RETHOOK select HAVE_RSEQ select HAVE_SAMPLE_FTRACE_DIRECT select HAVE_SAMPLE_FTRACE_DIRECT_MULTI @@ -210,9 +210,9 @@ config S390 select HAVE_VIRT_CPU_ACCOUNTING_IDLE select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI + select MMU_GATHER_MERGE_VMAS select MMU_GATHER_NO_GATHER select MMU_GATHER_RCU_TABLE_FREE - select MMU_GATHER_MERGE_VMAS select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PCI select NEED_PER_CPU_EMBED_FIRST_CHUNK -- cgit v1.2.3 From 42e19e6f04984088b6f9f0507c4c89a8152d9730 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 1 Mar 2023 02:23:08 +0100 Subject: s390/kprobes: fix irq mask clobbering on kprobe reenter from post_handler Recent test_kprobe_missed kprobes kunit test uncovers the following error (reported when CONFIG_DEBUG_ATOMIC_SLEEP is enabled): BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 662, name: kunit_try_catch preempt_count: 0, expected: 0 RCU nest depth: 0, expected: 0 no locks held by kunit_try_catch/662. irq event stamp: 280 hardirqs last enabled at (279): [<00000003e60a3d42>] __do_pgm_check+0x17a/0x1c0 hardirqs last disabled at (280): [<00000003e3bd774a>] kprobe_exceptions_notify+0x27a/0x318 softirqs last enabled at (0): [<00000003e3c5c890>] copy_process+0x14a8/0x4c80 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 46 PID: 662 Comm: kunit_try_catch Tainted: G N 6.2.0-173644-g44c18d77f0c0 #2 Hardware name: IBM 3931 A01 704 (LPAR) Call Trace: [<00000003e60a3a00>] dump_stack_lvl+0x120/0x198 [<00000003e3d02e82>] __might_resched+0x60a/0x668 [<00000003e60b9908>] __mutex_lock+0xc0/0x14e0 [<00000003e60bad5a>] mutex_lock_nested+0x32/0x40 [<00000003e3f7b460>] unregister_kprobe+0x30/0xd8 [<00000003e51b2602>] test_kprobe_missed+0xf2/0x268 [<00000003e51b5406>] kunit_try_run_case+0x10e/0x290 [<00000003e51b7dfa>] kunit_generic_run_threadfn_adapter+0x62/0xb8 [<00000003e3ce30f8>] kthread+0x2d0/0x398 [<00000003e3b96afa>] __ret_from_fork+0x8a/0xe8 [<00000003e60ccada>] ret_from_fork+0xa/0x40 The reason for this error report is that kprobes handling code failed to restore irqs. The problem is that when kprobe is triggered from another kprobe post_handler current sequence of enable_singlestep / disable_singlestep is the following: enable_singlestep <- original kprobe (saves kprobe_saved_imask) enable_singlestep <- kprobe triggered from post_handler (clobbers kprobe_saved_imask) disable_singlestep <- kprobe triggered from post_handler (restores kprobe_saved_imask) disable_singlestep <- original kprobe (restores wrong clobbered kprobe_saved_imask) There is just one kprobe_ctlblk per cpu and both calls saves and loads irq mask to kprobe_saved_imask. To fix the problem simply move resume_execution (which calls disable_singlestep) before calling post_handler. This also fixes the problem that post_handler is called with pt_regs which were not yet adjusted after single-stepping. Cc: stable@vger.kernel.org Fixes: 4ba069b802c2 ("[S390] add kprobes support.") Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/kprobes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 5e713f318de3..698fce57a2c8 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -402,12 +402,11 @@ static int post_kprobe_handler(struct pt_regs *regs) if (!p) return 0; + resume_execution(p, regs); if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) { kcb->kprobe_status = KPROBE_HIT_SSDONE; p->post_handler(p, regs, 0); } - - resume_execution(p, regs); pop_kprobe(kcb); preempt_enable_no_resched(); -- cgit v1.2.3 From cd57953936f2213dfaccce10d20f396956222c7d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 1 Mar 2023 17:58:06 +0100 Subject: s390/kprobes: fix current_kprobe never cleared after kprobes reenter Recent test_kprobe_missed kprobes kunit test uncovers the following problem. Once kprobe is triggered from another kprobe (kprobe reenter), all future kprobes on this cpu are considered as kprobe reenter, thus pre_handler and post_handler are not being called and kprobes are counted as "missed". Commit b9599798f953 ("[S390] kprobes: activation and deactivation") introduced a simpler scheme for kprobes (de)activation and status tracking by using push_kprobe/pop_kprobe, which supposed to work for both initial kprobe entry as well as kprobe reentry and helps to avoid handling those two cases differently. The problem is that a sequence of calls in case of kprobes reenter: push_kprobe() <- NULL (current_kprobe) push_kprobe() <- kprobe1 (current_kprobe) pop_kprobe() -> kprobe1 (current_kprobe) pop_kprobe() -> kprobe1 (current_kprobe) leaves "kprobe1" as "current_kprobe" on this cpu, instead of setting it to NULL. In fact push_kprobe/pop_kprobe can only store a single state (there is just one prev_kprobe in kprobe_ctlblk). Which is a hack but sufficient, there is no need to have another prev_kprobe just to store NULL. To make a simple and backportable fix simply reset "prev_kprobe" when kprobe is poped from this "stack". No need to worry about "kprobe_status" in this case, because its value is only checked when current_kprobe != NULL. Cc: stable@vger.kernel.org Fixes: b9599798f953 ("[S390] kprobes: activation and deactivation") Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/kprobes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 698fce57a2c8..7b41ceecbb25 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -278,6 +278,7 @@ static void pop_kprobe(struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->prev_kprobe.kp = NULL; } NOKPROBE_SYMBOL(pop_kprobe); -- cgit v1.2.3