diff options
Diffstat (limited to 'arch/s390/kernel')
31 files changed, 1070 insertions, 371 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 4a44ba5a2d73..80f500ffb55c 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -40,7 +40,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o -obj-y += smp.o +obj-y += smp.o text_amode31.o extra-y += head64.o vmlinux.lds diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 77ff2130cb04..b57da9338588 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -116,6 +116,7 @@ int main(void) OFFSET(__LC_RESTART_FN, lowcore, restart_fn); OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); + OFFSET(__LC_RESTART_FLAGS, lowcore, restart_flags); OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce); OFFSET(__LC_USER_ASCE, lowcore, user_asce); OFFSET(__LC_LPP, lowcore, lpp); @@ -152,5 +153,12 @@ int main(void) DEFINE(__KEXEC_SHA_REGION_SIZE, sizeof(struct kexec_sha_region)); /* sizeof kernel parameter area */ DEFINE(__PARMAREA_SIZE, sizeof(struct parmarea)); + /* kernel parameter area offsets */ + DEFINE(IPL_DEVICE, PARMAREA + offsetof(struct parmarea, ipl_device)); + DEFINE(INITRD_START, PARMAREA + offsetof(struct parmarea, initrd_start)); + DEFINE(INITRD_SIZE, PARMAREA + offsetof(struct parmarea, initrd_size)); + DEFINE(OLDMEM_BASE, PARMAREA + offsetof(struct parmarea, oldmem_base)); + DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size)); + DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line)); return 0; } diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 0e36dfc9ccd6..d72a6df058d7 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -140,7 +140,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) while (count) { from = __pa(src); - if (!OLDMEM_BASE && from < sclp.hsa_size) { + if (!oldmem_data.start && from < sclp.hsa_size) { /* Copy from zfcp/nvme dump HSA area */ len = min(count, sclp.hsa_size - from); rc = memcpy_hsa_kernel(dst, from, len); @@ -148,12 +148,12 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) { - from -= OLDMEM_BASE; - len = min(count, OLDMEM_SIZE - from); - } else if (OLDMEM_BASE && from < OLDMEM_SIZE) { - len = min(count, OLDMEM_SIZE - from); - from += OLDMEM_BASE; + if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) { + from -= oldmem_data.start; + len = min(count, oldmem_data.size - from); + } else if (oldmem_data.start && from < oldmem_data.size) { + len = min(count, oldmem_data.size - from); + from += oldmem_data.start; } else { len = count; } @@ -183,7 +183,7 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count) while (count) { from = __pa(src); - if (!OLDMEM_BASE && from < sclp.hsa_size) { + if (!oldmem_data.start && from < sclp.hsa_size) { /* Copy from zfcp/nvme dump HSA area */ len = min(count, sclp.hsa_size - from); rc = memcpy_hsa_user(dst, from, len); @@ -191,12 +191,12 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) { - from -= OLDMEM_BASE; - len = min(count, OLDMEM_SIZE - from); - } else if (OLDMEM_BASE && from < OLDMEM_SIZE) { - len = min(count, OLDMEM_SIZE - from); - from += OLDMEM_BASE; + if (oldmem_data.start && from - oldmem_data.size < oldmem_data.size) { + from -= oldmem_data.size; + len = min(count, oldmem_data.size - from); + } else if (oldmem_data.start && from < oldmem_data.size) { + len = min(count, oldmem_data.size - from); + from += oldmem_data.start; } else { len = count; } @@ -243,10 +243,10 @@ static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, unsigned long size_old; int rc; - if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) { - size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT)); + if (pfn < oldmem_data.size >> PAGE_SHIFT) { + size_old = min(size, oldmem_data.size - (pfn << PAGE_SHIFT)); rc = remap_pfn_range(vma, from, - pfn + (OLDMEM_BASE >> PAGE_SHIFT), + pfn + (oldmem_data.start >> PAGE_SHIFT), size_old, prot); if (rc || size == size_old) return rc; @@ -288,7 +288,7 @@ static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot) { - if (OLDMEM_BASE) + if (oldmem_data.start) return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot); else return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size, @@ -633,17 +633,17 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) u64 hdr_off; /* If we are not in kdump or zfcp/nvme dump mode return */ - if (!OLDMEM_BASE && !is_ipl_type_dump()) + if (!oldmem_data.start && !is_ipl_type_dump()) return 0; /* If we cannot get HSA size for zfcp/nvme dump return error */ if (is_ipl_type_dump() && !sclp.hsa_size) return -ENODEV; /* For kdump, exclude previous crashkernel memory */ - if (OLDMEM_BASE) { - oldmem_region.base = OLDMEM_BASE; - oldmem_region.size = OLDMEM_SIZE; - oldmem_type.total_size = OLDMEM_SIZE; + if (oldmem_data.start) { + oldmem_region.base = oldmem_data.start; + oldmem_region.size = oldmem_data.size; + oldmem_type.total_size = oldmem_data.size; } mem_chunk_cnt = get_mem_chunk_cnt(); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 09b6c6402f9b..4331c7e6e1c0 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/minmax.h> #include <linux/debugfs.h> #include <asm/debug.h> @@ -92,6 +93,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, const char *in_buf); static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, debug_sprintf_entry_t *curr_event); +static void debug_areas_swap(debug_info_t *a, debug_info_t *b); +static void debug_events_append(debug_info_t *dest, debug_info_t *src); /* globals */ @@ -311,24 +314,6 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area, goto out; rc->mode = mode & ~S_IFMT; - - /* create root directory */ - rc->debugfs_root_entry = debugfs_create_dir(rc->name, - debug_debugfs_root_entry); - - /* append new element to linked list */ - if (!debug_area_first) { - /* first element in list */ - debug_area_first = rc; - rc->prev = NULL; - } else { - /* append element to end of list */ - debug_area_last->next = rc; - rc->prev = debug_area_last; - } - debug_area_last = rc; - rc->next = NULL; - refcount_set(&rc->ref_count, 1); out: return rc; @@ -388,27 +373,10 @@ static void debug_info_get(debug_info_t *db_info) */ static void debug_info_put(debug_info_t *db_info) { - int i; - if (!db_info) return; - if (refcount_dec_and_test(&db_info->ref_count)) { - for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (!db_info->views[i]) - continue; - debugfs_remove(db_info->debugfs_entries[i]); - } - debugfs_remove(db_info->debugfs_root_entry); - if (db_info == debug_area_first) - debug_area_first = db_info->next; - if (db_info == debug_area_last) - debug_area_last = db_info->prev; - if (db_info->prev) - db_info->prev->next = db_info->next; - if (db_info->next) - db_info->next->prev = db_info->prev; + if (refcount_dec_and_test(&db_info->ref_count)) debug_info_free(db_info); - } } /* @@ -632,6 +600,31 @@ static int debug_close(struct inode *inode, struct file *file) return 0; /* success */ } +/* Create debugfs entries and add to internal list. */ +static void _debug_register(debug_info_t *id) +{ + /* create root directory */ + id->debugfs_root_entry = debugfs_create_dir(id->name, + debug_debugfs_root_entry); + + /* append new element to linked list */ + if (!debug_area_first) { + /* first element in list */ + debug_area_first = id; + id->prev = NULL; + } else { + /* append element to end of list */ + debug_area_last->next = id; + id->prev = debug_area_last; + } + debug_area_last = id; + id->next = NULL; + + debug_register_view(id, &debug_level_view); + debug_register_view(id, &debug_flush_view); + debug_register_view(id, &debug_pages_view); +} + /** * debug_register_mode() - creates and initializes debug area. * @@ -661,19 +654,16 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, if ((uid != 0) || (gid != 0)) pr_warn("Root becomes the owner of all s390dbf files in sysfs\n"); BUG_ON(!initialized); - mutex_lock(&debug_mutex); /* create new debug_info */ rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode); - if (!rc) - goto out; - debug_register_view(rc, &debug_level_view); - debug_register_view(rc, &debug_flush_view); - debug_register_view(rc, &debug_pages_view); -out: - if (!rc) + if (rc) { + mutex_lock(&debug_mutex); + _debug_register(rc); + mutex_unlock(&debug_mutex); + } else { pr_err("Registering debug feature %s failed\n", name); - mutex_unlock(&debug_mutex); + } return rc; } EXPORT_SYMBOL(debug_register_mode); @@ -703,6 +693,82 @@ debug_info_t *debug_register(const char *name, int pages_per_area, EXPORT_SYMBOL(debug_register); /** + * debug_register_static() - registers a static debug area + * + * @id: Handle for static debug area + * @pages_per_area: Number of pages per area + * @nr_areas: Number of debug areas + * + * Register debug_info_t defined using DEFINE_STATIC_DEBUG_INFO. + * + * Note: This function is called automatically via an initcall generated by + * DEFINE_STATIC_DEBUG_INFO. + */ +void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas) +{ + unsigned long flags; + debug_info_t *copy; + + if (!initialized) { + pr_err("Tried to register debug feature %s too early\n", + id->name); + return; + } + + copy = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size, + id->level, ALL_AREAS); + if (!copy) { + pr_err("Registering debug feature %s failed\n", id->name); + + /* Clear pointers to prevent tracing into released initdata. */ + spin_lock_irqsave(&id->lock, flags); + id->areas = NULL; + id->active_pages = NULL; + id->active_entries = NULL; + spin_unlock_irqrestore(&id->lock, flags); + + return; + } + + /* Replace static trace area with dynamic copy. */ + spin_lock_irqsave(&id->lock, flags); + debug_events_append(copy, id); + debug_areas_swap(id, copy); + spin_unlock_irqrestore(&id->lock, flags); + + /* Clear pointers to initdata and discard copy. */ + copy->areas = NULL; + copy->active_pages = NULL; + copy->active_entries = NULL; + debug_info_free(copy); + + mutex_lock(&debug_mutex); + _debug_register(id); + mutex_unlock(&debug_mutex); +} + +/* Remove debugfs entries and remove from internal list. */ +static void _debug_unregister(debug_info_t *id) +{ + int i; + + for (i = 0; i < DEBUG_MAX_VIEWS; i++) { + if (!id->views[i]) + continue; + debugfs_remove(id->debugfs_entries[i]); + } + debugfs_remove(id->debugfs_root_entry); + if (id == debug_area_first) + debug_area_first = id->next; + if (id == debug_area_last) + debug_area_last = id->prev; + if (id->prev) + id->prev->next = id->next; + if (id->next) + id->next->prev = id->prev; +} + +/** * debug_unregister() - give back debug area. * * @id: handle for debug log @@ -715,8 +781,10 @@ void debug_unregister(debug_info_t *id) if (!id) return; mutex_lock(&debug_mutex); - debug_info_put(id); + _debug_unregister(id); mutex_unlock(&debug_mutex); + + debug_info_put(id); } EXPORT_SYMBOL(debug_unregister); @@ -726,35 +794,28 @@ EXPORT_SYMBOL(debug_unregister); */ static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area) { - debug_entry_t ***new_areas; + debug_info_t *new_id; unsigned long flags; - int rc = 0; if (!id || (nr_areas <= 0) || (pages_per_area < 0)) return -EINVAL; - if (pages_per_area > 0) { - new_areas = debug_areas_alloc(pages_per_area, nr_areas); - if (!new_areas) { - pr_info("Allocating memory for %i pages failed\n", - pages_per_area); - rc = -ENOMEM; - goto out; - } - } else { - new_areas = NULL; + + new_id = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size, + id->level, ALL_AREAS); + if (!new_id) { + pr_info("Allocating memory for %i pages failed\n", + pages_per_area); + return -ENOMEM; } + spin_lock_irqsave(&id->lock, flags); - debug_areas_free(id); - id->areas = new_areas; - id->nr_areas = nr_areas; - id->pages_per_area = pages_per_area; - id->active_area = 0; - memset(id->active_entries, 0, sizeof(int)*id->nr_areas); - memset(id->active_pages, 0, sizeof(int)*id->nr_areas); + debug_events_append(new_id, id); + debug_areas_swap(new_id, id); + debug_info_free(new_id); spin_unlock_irqrestore(&id->lock, flags); pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area); -out: - return rc; + + return 0; } /** @@ -772,16 +833,17 @@ void debug_set_level(debug_info_t *id, int new_level) if (!id) return; - spin_lock_irqsave(&id->lock, flags); + if (new_level == DEBUG_OFF_LEVEL) { - id->level = DEBUG_OFF_LEVEL; pr_info("%s: switched off\n", id->name); } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) { pr_info("%s: level %i is out of range (%i - %i)\n", id->name, new_level, 0, DEBUG_MAX_LEVEL); - } else { - id->level = new_level; + return; } + + spin_lock_irqsave(&id->lock, flags); + id->level = new_level; spin_unlock_irqrestore(&id->lock, flags); } EXPORT_SYMBOL(debug_set_level); @@ -821,6 +883,42 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id) id->active_entries[id->active_area]); } +/* Swap debug areas of a and b. */ +static void debug_areas_swap(debug_info_t *a, debug_info_t *b) +{ + swap(a->nr_areas, b->nr_areas); + swap(a->pages_per_area, b->pages_per_area); + swap(a->areas, b->areas); + swap(a->active_area, b->active_area); + swap(a->active_pages, b->active_pages); + swap(a->active_entries, b->active_entries); +} + +/* Append all debug events in active area from source to destination log. */ +static void debug_events_append(debug_info_t *dest, debug_info_t *src) +{ + debug_entry_t *from, *to, *last; + + if (!src->areas || !dest->areas) + return; + + /* Loop over all entries in src, starting with oldest. */ + from = get_active_entry(src); + last = from; + do { + if (from->clock != 0LL) { + to = get_active_entry(dest); + memset(to, 0, dest->entry_size); + memcpy(to, from, min(src->entry_size, + dest->entry_size)); + proceed_active_entry(dest); + } + + proceed_active_entry(src); + from = get_active_entry(src); + } while (from != last); +} + /* * debug_finish_entry: * - set timestamp, caller address, cpu number etc. @@ -1111,16 +1209,17 @@ int debug_register_view(debug_info_t *id, struct debug_view *view) break; } if (i == DEBUG_MAX_VIEWS) { - pr_err("Registering view %s/%s would exceed the maximum " - "number of views %i\n", id->name, view->name, i); rc = -1; } else { id->views[i] = view; id->debugfs_entries[i] = pde; } spin_unlock_irqrestore(&id->lock, flags); - if (rc) + if (rc) { + pr_err("Registering view %s/%s would exceed the maximum " + "number of views %i\n", id->name, view->name, i); debugfs_remove(pde); + } out: return rc; } diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index a3f47464c3f1..76a656b2146f 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -14,6 +14,7 @@ #include <asm/diag.h> #include <asm/trace/diag.h> #include <asm/sections.h> +#include "entry.h" struct diag_stat { unsigned int counter[NR_DIAG_STAT]; @@ -50,8 +51,16 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; -struct diag_ops __bootdata_preserved(diag_dma_ops); -struct diag210 *__bootdata_preserved(__diag210_tmp_dma); +struct diag_ops __amode31_ref diag_amode31_ops = { + .diag210 = _diag210_amode31, + .diag26c = _diag26c_amode31, + .diag14 = _diag14_amode31, + .diag0c = _diag0c_amode31, + .diag308_reset = _diag308_reset_amode31 +}; + +static struct diag210 _diag210_tmp_amode31 __section(".amode31.data"); +struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31; static int show_diag_stat(struct seq_file *m, void *v) { @@ -59,7 +68,7 @@ static int show_diag_stat(struct seq_file *m, void *v) unsigned long n = (unsigned long) v - 1; int cpu, prec, tmp; - get_online_cpus(); + cpus_read_lock(); if (n == 0) { seq_puts(m, " "); @@ -78,7 +87,7 @@ static int show_diag_stat(struct seq_file *m, void *v) } seq_printf(m, " %s\n", diag_map[n-1].name); } - put_online_cpus(); + cpus_read_unlock(); return 0; } @@ -135,7 +144,7 @@ EXPORT_SYMBOL(diag_stat_inc_norecursion); int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) { diag_stat_inc(DIAG_STAT_X014); - return diag_dma_ops.diag14(rx, ry1, subcode); + return diag_amode31_ops.diag14(rx, ry1, subcode); } EXPORT_SYMBOL(diag14); @@ -172,12 +181,12 @@ int diag210(struct diag210 *addr) int ccode; spin_lock_irqsave(&diag210_lock, flags); - *__diag210_tmp_dma = *addr; + *__diag210_tmp_amode31 = *addr; diag_stat_inc(DIAG_STAT_X210); - ccode = diag_dma_ops.diag210(__diag210_tmp_dma); + ccode = diag_amode31_ops.diag210(__diag210_tmp_amode31); - *addr = *__diag210_tmp_dma; + *addr = *__diag210_tmp_amode31; spin_unlock_irqrestore(&diag210_lock, flags); return ccode; @@ -205,6 +214,6 @@ EXPORT_SYMBOL(diag224); int diag26c(void *req, void *resp, enum diag26c_sc subcode) { diag_stat_inc(DIAG_STAT_X26C); - return diag_dma_ops.diag26c(req, resp, subcode); + return diag_amode31_ops.diag26c(req, resp, subcode); } EXPORT_SYMBOL(diag26c); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 5412efe328f8..ec5515423f17 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -312,10 +312,12 @@ static const unsigned char formats[][6] = { [INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 }, [INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 }, [INSTR_VRR_VV0U0U] = { V_8, V_12, U4_32, U4_24, 0, 0 }, + [INSTR_VRR_VV0U2] = { V_8, V_12, U4_24, 0, 0, 0 }, [INSTR_VRR_VV0UU2] = { V_8, V_12, U4_32, U4_28, 0, 0 }, [INSTR_VRR_VV0UUU] = { V_8, V_12, U4_32, U4_28, U4_24, 0 }, [INSTR_VRR_VVV] = { V_8, V_12, V_16, 0, 0, 0 }, [INSTR_VRR_VVV0U] = { V_8, V_12, V_16, U4_32, 0, 0 }, + [INSTR_VRR_VVV0U0] = { V_8, V_12, V_16, U4_24, 0, 0 }, [INSTR_VRR_VVV0U0U] = { V_8, V_12, V_16, U4_32, U4_24, 0 }, [INSTR_VRR_VVV0UU] = { V_8, V_12, V_16, U4_32, U4_28, 0 }, [INSTR_VRR_VVV0UUU] = { V_8, V_12, V_16, U4_32, U4_28, U4_24 }, diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index fb84e3fc1686..9857cb046726 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -236,6 +236,10 @@ static __init void detect_machine_facilities(void) clock_comparator_max = -1ULL >> 1; __ctl_set_bit(0, 53); } + if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { + S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO; + /* the control bit is set during PCI initialization */ + } } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5a2f70cbd3a9..b9716a7e326d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -624,12 +624,15 @@ ENTRY(mcck_int_handler) 4: j 4b ENDPROC(mcck_int_handler) -# -# PSW restart interrupt handler -# ENTRY(restart_int_handler) ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 stg %r15,__LC_SAVE_AREA_RESTART + TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 + jz 0f + la %r15,4095 + lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r15) +0: larl %r15,.Lstosm_tmp + stosm 0(%r15),0x04 # turn dat on, keep irqs off lg %r15,__LC_RESTART_STACK xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15) @@ -638,7 +641,7 @@ ENTRY(restart_int_handler) xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) lg %r1,__LC_RESTART_FN # load fn, parm & source cpu lg %r2,__LC_RESTART_DATA - lg %r3,__LC_RESTART_SOURCE + lgf %r3,__LC_RESTART_SOURCE ltgr %r3,%r3 # test source cpu address jm 1f # negative -> skip source stop 0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 1ab33465382f..7f2696e8d511 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -28,10 +28,8 @@ void do_non_secure_storage_access(struct pt_regs *regs); void do_secure_storage_violation(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); void kernel_stack_overflow(struct pt_regs * regs); -void do_signal(struct pt_regs *regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); -void do_notify_resume(struct pt_regs *regs); void __init init_IRQ(void); void do_io_irq(struct pt_regs *regs); @@ -64,4 +62,13 @@ void stack_free(unsigned long stack); extern char kprobes_insn_page[]; +extern char _samode31[], _eamode31[]; +extern char _stext_amode31[], _etext_amode31[]; +extern struct exception_table_entry _start_amode31_ex_table[]; +extern struct exception_table_entry _stop_amode31_ex_table[]; + +#define __amode31_data __section(".amode31.data") +#define __amode31_ref __section(".amode31.refs") +extern long _start_amode31_refs[], _end_amode31_refs[]; + #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 2d8f595d9196..0a464d328467 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -18,8 +18,11 @@ #include <trace/syscall.h> #include <asm/asm-offsets.h> #include <asm/cacheflush.h> +#include <asm/ftrace.lds.h> +#include <asm/nospec-branch.h> #include <asm/set_memory.h> #include "entry.h" +#include "ftrace.h" /* * To generate function prologue either gcc's hotpatch feature (since gcc 4.8) @@ -41,7 +44,130 @@ */ void *ftrace_func __read_mostly = ftrace_stub; -unsigned long ftrace_plt; +struct ftrace_insn { + u16 opc; + s32 disp; +} __packed; + +asm( + " .align 16\n" + "ftrace_shared_hotpatch_trampoline_br:\n" + " lmg %r0,%r1,2(%r1)\n" + " br %r1\n" + "ftrace_shared_hotpatch_trampoline_br_end:\n" +); + +#ifdef CONFIG_EXPOLINE +asm( + " .align 16\n" + "ftrace_shared_hotpatch_trampoline_ex:\n" + " lmg %r0,%r1,2(%r1)\n" + " ex %r0," __stringify(__LC_BR_R1) "(%r0)\n" + " j .\n" + "ftrace_shared_hotpatch_trampoline_ex_end:\n" +); + +asm( + " .align 16\n" + "ftrace_shared_hotpatch_trampoline_exrl:\n" + " lmg %r0,%r1,2(%r1)\n" + " .insn ril,0xc60000000000,%r0,0f\n" /* exrl */ + " j .\n" + "0: br %r1\n" + "ftrace_shared_hotpatch_trampoline_exrl_end:\n" +); +#endif /* CONFIG_EXPOLINE */ + +#ifdef CONFIG_MODULES +static char *ftrace_plt; + +asm( + " .data\n" + "ftrace_plt_template:\n" + " basr %r1,%r0\n" + " lg %r1,0f-.(%r1)\n" + " br %r1\n" + "0: .quad ftrace_caller\n" + "ftrace_plt_template_end:\n" + " .previous\n" +); +#endif /* CONFIG_MODULES */ + +static const char *ftrace_shared_hotpatch_trampoline(const char **end) +{ + const char *tstart, *tend; + + tstart = ftrace_shared_hotpatch_trampoline_br; + tend = ftrace_shared_hotpatch_trampoline_br_end; +#ifdef CONFIG_EXPOLINE + if (!nospec_disable) { + tstart = ftrace_shared_hotpatch_trampoline_ex; + tend = ftrace_shared_hotpatch_trampoline_ex_end; + if (test_facility(35)) { /* exrl */ + tstart = ftrace_shared_hotpatch_trampoline_exrl; + tend = ftrace_shared_hotpatch_trampoline_exrl_end; + } + } +#endif /* CONFIG_EXPOLINE */ + if (end) + *end = tend; + return tstart; +} + +bool ftrace_need_init_nop(void) +{ + return ftrace_shared_hotpatch_trampoline(NULL); +} + +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline = + __ftrace_hotpatch_trampolines_start; + static const char orig[6] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 }; + static struct ftrace_hotpatch_trampoline *trampoline; + struct ftrace_hotpatch_trampoline **next_trampoline; + struct ftrace_hotpatch_trampoline *trampolines_end; + struct ftrace_hotpatch_trampoline tmp; + struct ftrace_insn *insn; + const char *shared; + s32 disp; + + BUILD_BUG_ON(sizeof(struct ftrace_hotpatch_trampoline) != + SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE); + + next_trampoline = &next_vmlinux_trampoline; + trampolines_end = __ftrace_hotpatch_trampolines_end; + shared = ftrace_shared_hotpatch_trampoline(NULL); +#ifdef CONFIG_MODULES + if (mod) { + next_trampoline = &mod->arch.next_trampoline; + trampolines_end = mod->arch.trampolines_end; + shared = ftrace_plt; + } +#endif + + if (WARN_ON_ONCE(*next_trampoline >= trampolines_end)) + return -ENOMEM; + trampoline = (*next_trampoline)++; + + /* Check for the compiler-generated fentry nop (brcl 0, .). */ + if (WARN_ON_ONCE(memcmp((const void *)rec->ip, &orig, sizeof(orig)))) + return -EINVAL; + + /* Generate the trampoline. */ + tmp.brasl_opc = 0xc015; /* brasl %r1, shared */ + tmp.brasl_disp = (shared - (const char *)&trampoline->brasl_opc) / 2; + tmp.interceptor = FTRACE_ADDR; + tmp.rest_of_intercepted_function = rec->ip + sizeof(struct ftrace_insn); + s390_kernel_write(trampoline, &tmp, sizeof(tmp)); + + /* Generate a jump to the trampoline. */ + disp = ((char *)trampoline - (char *)rec->ip) / 2; + insn = (struct ftrace_insn *)rec->ip; + s390_kernel_write(&insn->disp, &disp, sizeof(disp)); + + return 0; +} int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) @@ -49,11 +175,45 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return 0; } +static void ftrace_generate_nop_insn(struct ftrace_insn *insn) +{ + /* brcl 0,0 */ + insn->opc = 0xc004; + insn->disp = 0; +} + +static void ftrace_generate_call_insn(struct ftrace_insn *insn, + unsigned long ip) +{ + unsigned long target; + + /* brasl r0,ftrace_caller */ + target = FTRACE_ADDR; +#ifdef CONFIG_MODULES + if (is_module_addr((void *)ip)) + target = (unsigned long)ftrace_plt; +#endif /* CONFIG_MODULES */ + insn->opc = 0xc005; + insn->disp = (target - ip) / 2; +} + +static void brcl_disable(void *brcl) +{ + u8 op = 0x04; /* set mask field to zero */ + + s390_kernel_write((char *)brcl + 1, &op, sizeof(op)); +} + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { struct ftrace_insn orig, new, old; + if (ftrace_shared_hotpatch_trampoline(NULL)) { + brcl_disable((void *)rec->ip); + return 0; + } + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; /* Replace ftrace call with a nop. */ @@ -67,10 +227,22 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return 0; } +static void brcl_enable(void *brcl) +{ + u8 op = 0xf4; /* set mask field to all ones */ + + s390_kernel_write((char *)brcl + 1, &op, sizeof(op)); +} + int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { struct ftrace_insn orig, new, old; + if (ftrace_shared_hotpatch_trampoline(NULL)) { + brcl_enable((void *)rec->ip); + return 0; + } + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; /* Replace nop with an ftrace call. */ @@ -95,22 +267,44 @@ int __init ftrace_dyn_arch_init(void) return 0; } +void arch_ftrace_update_code(int command) +{ + if (ftrace_shared_hotpatch_trampoline(NULL)) + ftrace_modify_all_code(command); + else + ftrace_run_stop_machine(command); +} + +static void __ftrace_sync(void *dummy) +{ +} + +int ftrace_arch_code_modify_post_process(void) +{ + if (ftrace_shared_hotpatch_trampoline(NULL)) { + /* Send SIGP to the other CPUs, so they see the new code. */ + smp_call_function(__ftrace_sync, NULL, 1); + } + return 0; +} + #ifdef CONFIG_MODULES static int __init ftrace_plt_init(void) { - unsigned int *ip; + const char *start, *end; - ftrace_plt = (unsigned long) module_alloc(PAGE_SIZE); + ftrace_plt = module_alloc(PAGE_SIZE); if (!ftrace_plt) panic("cannot allocate ftrace plt\n"); - ip = (unsigned int *) ftrace_plt; - ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ - ip[1] = 0x100a0004; - ip[2] = 0x07f10000; - ip[3] = FTRACE_ADDR >> 32; - ip[4] = FTRACE_ADDR & 0xffffffff; - set_memory_ro(ftrace_plt, 1); + + start = ftrace_shared_hotpatch_trampoline(&end); + if (!start) { + start = ftrace_plt_template; + end = ftrace_plt_template_end; + } + memcpy(ftrace_plt, start, end - start); + set_memory_ro((unsigned long)ftrace_plt, 1); return 0; } device_initcall(ftrace_plt_init); @@ -147,17 +341,13 @@ NOKPROBE_SYMBOL(prepare_ftrace_return); */ int ftrace_enable_ftrace_graph_caller(void) { - u8 op = 0x04; /* set mask field to zero */ - - s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op)); + brcl_disable(__va(ftrace_graph_caller)); return 0; } int ftrace_disable_ftrace_graph_caller(void) { - u8 op = 0xf4; /* set mask field to all ones */ - - s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op)); + brcl_enable(__va(ftrace_graph_caller)); return 0; } diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h new file mode 100644 index 000000000000..69e416f4c6b0 --- /dev/null +++ b/arch/s390/kernel/ftrace.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _FTRACE_H +#define _FTRACE_H + +#include <asm/types.h> + +struct ftrace_hotpatch_trampoline { + u16 brasl_opc; + s32 brasl_disp; + s16: 16; + u64 rest_of_intercepted_function; + u64 interceptor; +} __packed; + +extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_start[]; +extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_end[]; +extern const char ftrace_shared_hotpatch_trampoline_br[]; +extern const char ftrace_shared_hotpatch_trampoline_br_end[]; +extern const char ftrace_shared_hotpatch_trampoline_ex[]; +extern const char ftrace_shared_hotpatch_trampoline_ex_end[]; +extern const char ftrace_shared_hotpatch_trampoline_exrl[]; +extern const char ftrace_shared_hotpatch_trampoline_exrl_end[]; +extern const char ftrace_plt_template[]; +extern const char ftrace_plt_template_end[]; + +#endif /* _FTRACE_H */ diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 0c253886da78..114b5490ad8e 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -21,6 +21,7 @@ ENTRY(startup_continue) larl %r1,tod_clock_base mvc 0(16,%r1),__LC_BOOT_CLOCK larl %r13,.LPG1 # get base + lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers # # Setup stack # @@ -41,3 +42,19 @@ ENTRY(startup_continue) .align 16 .LPG1: .Ldw: .quad 0x0002000180000000,0x0000000000000000 +.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space + .quad 0 # cr1: primary space segment table + .quad 0 # cr2: dispatchable unit control table + .quad 0 # cr3: instruction authorization + .quad 0xffff # cr4: instruction authorization + .quad 0 # cr5: primary-aste origin + .quad 0 # cr6: I/O interrupts + .quad 0 # cr7: secondary space segment table + .quad 0x0000000000008000 # cr8: access registers translation + .quad 0 # cr9: tracing off + .quad 0 # cr10: tracing off + .quad 0 # cr11: tracing off + .quad 0 # cr12: tracing off + .quad 0 # cr13: home space segment table + .quad 0xc0000000 # cr14: machine check handling off + .quad 0 # cr15: linkage stack operations diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 50e2c21e0ec9..e2cc35775b99 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -179,8 +179,6 @@ static inline int __diag308(unsigned long subcode, void *addr) int diag308(unsigned long subcode, void *addr) { - if (IS_ENABLED(CONFIG_KASAN)) - __arch_local_irq_stosm(0x04); /* enable DAT */ diag_stat_inc(DIAG_STAT_X308); return __diag308(subcode, addr); } @@ -1843,7 +1841,6 @@ static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart); static void __do_restart(void *ignore) { - __arch_local_irq_stosm(0x04); /* enable DAT */ smp_send_stop(); #ifdef CONFIG_CRASH_DUMP crash_kexec(NULL); @@ -2082,7 +2079,7 @@ void s390_reset_system(void) /* Disable lowcore protection */ __ctl_clear_bit(0, 28); - diag_dma_ops.diag308_reset(); + diag_amode31_ops.diag308_reset(); } #ifdef CONFIG_KEXEC_FILE diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c index af43535a976d..b5245fadcfb0 100644 --- a/arch/s390/kernel/ipl_vmparm.c +++ b/arch/s390/kernel/ipl_vmparm.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/minmax.h> +#include <linux/string.h> #include <asm/ebcdic.h> #include <asm/ipl.h> diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 234d085257eb..3a3145c4a3ba 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -228,7 +228,7 @@ int show_interrupts(struct seq_file *p, void *v) int index = *(loff_t *) v; int cpu, irq; - get_online_cpus(); + cpus_read_lock(); if (index == 0) { seq_puts(p, " "); for_each_online_cpu(cpu) @@ -258,7 +258,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } out: - put_online_cpus(); + cpus_read_unlock(); return 0; } diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index ab584e8e3527..9156653b56f6 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -36,7 +36,7 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected, unsigned char *ipe = (unsigned char *)expected; unsigned char *ipn = (unsigned char *)new; - pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc); + pr_emerg("Jump label code mismatch at %pS [%px]\n", ipc, ipc); pr_emerg("Found: %6ph\n", ipc); pr_emerg("Expected: %6ph\n", ipe); pr_emerg("New: %6ph\n", ipn); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 1005a6935fbe..0505e55a6297 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -224,8 +224,8 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); - vmcoreinfo_append_str("SDMA=%lx\n", __sdma); - vmcoreinfo_append_str("EDMA=%lx\n", __edma); + vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31); + vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); } @@ -263,7 +263,6 @@ static void __do_machine_kexec(void *data) */ static void __machine_kexec(void *data) { - __arch_local_irq_stosm(0x04); /* enable DAT */ pfault_fini(); tracing_off(); debug_locks_off(); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 4055f1c49814..b01ba460b7ca 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -14,6 +14,7 @@ #include <linux/elf.h> #include <linux/vmalloc.h> #include <linux/fs.h> +#include <linux/ftrace.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/kasan.h> @@ -23,6 +24,8 @@ #include <asm/alternative.h> #include <asm/nospec-branch.h> #include <asm/facility.h> +#include <asm/ftrace.lds.h> +#include <asm/set_memory.h> #if 0 #define DEBUGP printk @@ -48,6 +51,13 @@ void *module_alloc(unsigned long size) return p; } +#ifdef CONFIG_FUNCTION_TRACER +void module_arch_cleanup(struct module *mod) +{ + module_memfree(mod->arch.trampolines_start); +} +#endif + void module_arch_freeing_init(struct module *mod) { if (is_livepatch_module(mod) && @@ -466,6 +476,30 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, write); } +#ifdef CONFIG_FUNCTION_TRACER +static int module_alloc_ftrace_hotpatch_trampolines(struct module *me, + const Elf_Shdr *s) +{ + char *start, *end; + int numpages; + size_t size; + + size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size); + numpages = DIV_ROUND_UP(size, PAGE_SIZE); + start = module_alloc(numpages * PAGE_SIZE); + if (!start) + return -ENOMEM; + set_memory_ro((unsigned long)start, numpages); + end = start + size; + + me->arch.trampolines_start = (struct ftrace_hotpatch_trampoline *)start; + me->arch.trampolines_end = (struct ftrace_hotpatch_trampoline *)end; + me->arch.next_trampoline = me->arch.trampolines_start; + + return 0; +} +#endif /* CONFIG_FUNCTION_TRACER */ + int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) @@ -473,6 +507,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *s; char *secstrings, *secname; void *aseg; +#ifdef CONFIG_FUNCTION_TRACER + int ret; +#endif if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable && me->arch.plt_size) { @@ -507,6 +544,14 @@ int module_finalize(const Elf_Ehdr *hdr, if (IS_ENABLED(CONFIG_EXPOLINE) && (str_has_prefix(secname, ".s390_return"))) nospec_revert(aseg, aseg + s->sh_size); + +#ifdef CONFIG_FUNCTION_TRACER + if (!strcmp(FTRACE_CALLSITE_SECTION, secname)) { + ret = module_alloc_ftrace_hotpatch_trampolines(me, s); + if (ret < 0) + return ret; + } +#endif /* CONFIG_FUNCTION_TRACER */ } jump_label_apply_nops(me); diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 5a7420b23aa8..4bef35b79b93 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -121,7 +121,7 @@ static void os_info_old_init(void) if (os_info_init) return; - if (!OLDMEM_BASE) + if (!oldmem_data.start) goto fail; if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr))) goto fail; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index d7dc36ec0a60..2e3bb633acf6 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1138,7 +1138,7 @@ static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int ret; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&cfset_ctrset_mutex); switch (cmd) { case S390_HWCTR_START: @@ -1155,7 +1155,7 @@ static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; } mutex_unlock(&cfset_ctrset_mutex); - put_online_cpus(); + cpus_read_unlock(); return ret; } diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 82df39b17bb5..d9d4a806979e 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -11,6 +11,7 @@ #include <linux/cpufeature.h> #include <linux/bitops.h> #include <linux/kernel.h> +#include <linux/random.h> #include <linux/sched/mm.h> #include <linux/init.h> #include <linux/seq_file.h> @@ -23,8 +24,12 @@ #include <asm/elf.h> #include <asm/lowcore.h> #include <asm/param.h> +#include <asm/sclp.h> #include <asm/smp.h> +unsigned long __read_mostly elf_hwcap; +char elf_platform[ELF_PLATFORM_SIZE]; + struct cpu_info { unsigned int cpu_mhz_dynamic; unsigned int cpu_mhz_static; @@ -113,15 +118,33 @@ static void show_facilities(struct seq_file *m) static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { - "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs", - "vxe2", "vxp", "sort", "dflt" - }; - static const char * const int_hwcap_str[] = { - "sie" + [HWCAP_NR_ESAN3] = "esan3", + [HWCAP_NR_ZARCH] = "zarch", + [HWCAP_NR_STFLE] = "stfle", + [HWCAP_NR_MSA] = "msa", + [HWCAP_NR_LDISP] = "ldisp", + [HWCAP_NR_EIMM] = "eimm", + [HWCAP_NR_DFP] = "dfp", + [HWCAP_NR_HPAGE] = "edat", + [HWCAP_NR_ETF3EH] = "etf3eh", + [HWCAP_NR_HIGH_GPRS] = "highgprs", + [HWCAP_NR_TE] = "te", + [HWCAP_NR_VXRS] = "vx", + [HWCAP_NR_VXRS_BCD] = "vxd", + [HWCAP_NR_VXRS_EXT] = "vxe", + [HWCAP_NR_GS] = "gs", + [HWCAP_NR_VXRS_EXT2] = "vxe2", + [HWCAP_NR_VXRS_PDE] = "vxp", + [HWCAP_NR_SORT] = "sort", + [HWCAP_NR_DFLT] = "dflt", + [HWCAP_NR_VXRS_PDE2] = "vxp2", + [HWCAP_NR_NNPA] = "nnpa", + [HWCAP_NR_PCI_MIO] = "pcimio", + [HWCAP_NR_SIE] = "sie", }; int i, cpu; + BUILD_BUG_ON(ARRAY_SIZE(hwcap_str) != HWCAP_NR_MAX); seq_printf(m, "vendor_id : IBM/S390\n" "# processors : %i\n" "bogomips per cpu: %lu.%02lu\n", @@ -132,9 +155,6 @@ static void show_cpu_summary(struct seq_file *m, void *v) for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); - for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) - if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) - seq_printf(m, "%s ", int_hwcap_str[i]); seq_puts(m, "\n"); show_facilities(m); show_cacheinfo(m); @@ -149,6 +169,141 @@ static void show_cpu_summary(struct seq_file *m, void *v) } } +static int __init setup_hwcaps(void) +{ + /* instructions named N3, "backported" to esa-mode */ + if (test_facility(0)) + elf_hwcap |= HWCAP_ESAN3; + + /* z/Architecture mode active */ + elf_hwcap |= HWCAP_ZARCH; + + /* store-facility-list-extended */ + if (test_facility(7)) + elf_hwcap |= HWCAP_STFLE; + + /* message-security assist */ + if (test_facility(17)) + elf_hwcap |= HWCAP_MSA; + + /* long-displacement */ + if (test_facility(19)) + elf_hwcap |= HWCAP_LDISP; + + /* extended-immediate */ + if (test_facility(21)) + elf_hwcap |= HWCAP_EIMM; + + /* extended-translation facility 3 enhancement */ + if (test_facility(22) && test_facility(30)) + elf_hwcap |= HWCAP_ETF3EH; + + /* decimal floating point & perform floating point operation */ + if (test_facility(42) && test_facility(44)) + elf_hwcap |= HWCAP_DFP; + + /* huge page support */ + if (MACHINE_HAS_EDAT1) + elf_hwcap |= HWCAP_HPAGE; + + /* 64-bit register support for 31-bit processes */ + elf_hwcap |= HWCAP_HIGH_GPRS; + + /* transactional execution */ + if (MACHINE_HAS_TE) + elf_hwcap |= HWCAP_TE; + + /* + * Vector extension can be disabled with the "novx" parameter. + * Use MACHINE_HAS_VX instead of facility bit 129. + */ + if (MACHINE_HAS_VX) { + elf_hwcap |= HWCAP_VXRS; + if (test_facility(134)) + elf_hwcap |= HWCAP_VXRS_BCD; + if (test_facility(135)) + elf_hwcap |= HWCAP_VXRS_EXT; + if (test_facility(148)) + elf_hwcap |= HWCAP_VXRS_EXT2; + if (test_facility(152)) + elf_hwcap |= HWCAP_VXRS_PDE; + if (test_facility(192)) + elf_hwcap |= HWCAP_VXRS_PDE2; + } + + if (test_facility(150)) + elf_hwcap |= HWCAP_SORT; + + if (test_facility(151)) + elf_hwcap |= HWCAP_DFLT; + + if (test_facility(165)) + elf_hwcap |= HWCAP_NNPA; + + /* guarded storage */ + if (MACHINE_HAS_GS) + elf_hwcap |= HWCAP_GS; + + if (MACHINE_HAS_PCI_MIO) + elf_hwcap |= HWCAP_PCI_MIO; + + /* virtualization support */ + if (sclp.has_sief2) + elf_hwcap |= HWCAP_SIE; + + return 0; +} +arch_initcall(setup_hwcaps); + +static int __init setup_elf_platform(void) +{ + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + add_device_randomness(&cpu_id, sizeof(cpu_id)); + switch (cpu_id.machine) { + case 0x2064: + case 0x2066: + default: /* Use "z900" as default for 64 bit kernels. */ + strcpy(elf_platform, "z900"); + break; + case 0x2084: + case 0x2086: + strcpy(elf_platform, "z990"); + break; + case 0x2094: + case 0x2096: + strcpy(elf_platform, "z9-109"); + break; + case 0x2097: + case 0x2098: + strcpy(elf_platform, "z10"); + break; + case 0x2817: + case 0x2818: + strcpy(elf_platform, "z196"); + break; + case 0x2827: + case 0x2828: + strcpy(elf_platform, "zEC12"); + break; + case 0x2964: + case 0x2965: + strcpy(elf_platform, "z13"); + break; + case 0x3906: + case 0x3907: + strcpy(elf_platform, "z14"); + break; + case 0x8561: + case 0x8562: + strcpy(elf_platform, "z15"); + break; + } + return 0; +} +arch_initcall(setup_elf_platform); + static void show_cpu_topology(struct seq_file *m, unsigned long n) { #ifdef CONFIG_SCHED_TOPOLOGY @@ -210,7 +365,7 @@ static inline void *c_update(loff_t *pos) static void *c_start(struct seq_file *m, loff_t *pos) { - get_online_cpus(); + cpus_read_lock(); return c_update(pos); } @@ -222,7 +377,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos) static void c_stop(struct seq_file *m, void *v) { - put_online_cpus(); + cpus_read_unlock(); } const struct seq_operations cpuinfo_op = { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ff0f9e838916..fe14beb338e5 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -89,27 +89,71 @@ EXPORT_SYMBOL(console_devno); unsigned int console_irq = -1; EXPORT_SYMBOL(console_irq); -unsigned long elf_hwcap __read_mostly = 0; -char elf_platform[ELF_PLATFORM_SIZE]; +/* + * Some code and data needs to stay below 2 GB, even when the kernel would be + * relocated above 2 GB, because it has to use 31 bit addresses. + * Such code and data is part of the .amode31 section. + */ +unsigned long __amode31_ref __samode31 = __pa(&_samode31); +unsigned long __amode31_ref __eamode31 = __pa(&_eamode31); +unsigned long __amode31_ref __stext_amode31 = __pa(&_stext_amode31); +unsigned long __amode31_ref __etext_amode31 = __pa(&_etext_amode31); +struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table; +struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table; + +/* + * Control registers CR2, CR5 and CR15 are initialized with addresses + * of tables that must be placed below 2G which is handled by the AMODE31 + * sections. + * Because the AMODE31 sections are relocated below 2G at startup, + * the content of control registers CR2, CR5 and CR15 must be updated + * with new addresses after the relocation. The initial initialization of + * control registers occurs in head64.S and then gets updated again after AMODE31 + * relocation. We must access the relevant AMODE31 tables indirectly via + * pointers placed in the .amode31.refs linker section. Those pointers get + * updated automatically during AMODE31 relocation and always contain a valid + * address within AMODE31 sections. + */ + +static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64); + +static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = { + [1] = 0xffffffffffffffff +}; + +static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = { + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0 +}; + +static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = { + 0, 0, 0x89000000, 0, + 0, 0, 0x8a000000, 0 +}; -unsigned long int_hwcap = 0; +static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31; +static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31; +static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31; +static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; int __bootdata(noexec_disabled); unsigned long __bootdata(ident_map_size); struct mem_detect_info __bootdata(mem_detect); +struct initrd_data __bootdata(initrd_data); -struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table); -struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table); -unsigned long __bootdata_preserved(__stext_dma); -unsigned long __bootdata_preserved(__etext_dma); -unsigned long __bootdata_preserved(__sdma); -unsigned long __bootdata_preserved(__edma); unsigned long __bootdata_preserved(__kaslr_offset); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); u64 __bootdata_preserved(alt_stfle_fac_list[16]); +struct oldmem_data __bootdata_preserved(oldmem_data); unsigned long VMALLOC_START; EXPORT_SYMBOL(VMALLOC_START); @@ -254,7 +298,7 @@ static void __init setup_zfcpdump(void) { if (!is_ipl_type_dump()) return; - if (OLDMEM_BASE) + if (oldmem_data.start) return; strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); console_loglevel = 2; @@ -421,7 +465,7 @@ static void __init setup_lowcore_dat_off(void) lc->restart_stack = (unsigned long) restart_stack; lc->restart_fn = (unsigned long) do_restart; lc->restart_data = 0; - lc->restart_source = -1UL; + lc->restart_source = -1U; mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); if (!mcck_stack) @@ -450,12 +494,19 @@ static void __init setup_lowcore_dat_off(void) static void __init setup_lowcore_dat_on(void) { + struct lowcore *lc = lowcore_ptr[0]; + __ctl_clear_bit(0, 28); S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; + __ctl_store(S390_lowcore.cregs_save_area, 0, 15); __ctl_set_bit(0, 28); + mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS); + mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw); + memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area, + sizeof(S390_lowcore.cregs_save_area)); } static struct resource code_resource = { @@ -610,9 +661,9 @@ static void __init reserve_crashkernel(void) return; } - low = crash_base ?: OLDMEM_BASE; + low = crash_base ?: oldmem_data.start; high = low + crash_size; - if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) { + if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) { /* The crashkernel fits into OLDMEM, reuse OLDMEM */ crash_base = low; } else { @@ -639,7 +690,7 @@ static void __init reserve_crashkernel(void) if (register_memory_notifier(&kdump_mem_nb)) return; - if (!OLDMEM_BASE && MACHINE_IS_VM) + if (!oldmem_data.start && MACHINE_IS_VM) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; @@ -658,11 +709,11 @@ static void __init reserve_crashkernel(void) static void __init reserve_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD - if (!INITRD_START || !INITRD_SIZE) + if (!initrd_data.start || !initrd_data.size) return; - initrd_start = INITRD_START; - initrd_end = initrd_start + INITRD_SIZE; - memblock_reserve(INITRD_START, INITRD_SIZE); + initrd_start = initrd_data.start; + initrd_end = initrd_start + initrd_data.size; + memblock_reserve(initrd_data.start, initrd_data.size); #endif } @@ -732,10 +783,10 @@ static void __init memblock_add_mem_detect_info(void) static void __init check_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START && INITRD_SIZE && - !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) { + if (initrd_data.start && initrd_data.size && + !memblock_is_region_memory(initrd_data.start, initrd_data.size)) { pr_err("The initial RAM disk does not fit into the memory\n"); - memblock_free(INITRD_START, INITRD_SIZE); + memblock_free(initrd_data.start, initrd_data.size); initrd_start = initrd_end = 0; } #endif @@ -748,10 +799,10 @@ static void __init reserve_kernel(void) { unsigned long start_pfn = PFN_UP(__pa(_end)); - memblock_reserve(0, HEAD_END); + memblock_reserve(0, STARTUP_NORMAL_OFFSET); + memblock_reserve((unsigned long)sclp_early_sccb, EXT_SCCB_READ_SCP); memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - (unsigned long)_stext); - memblock_reserve(__sdma, __edma - __sdma); } static void __init setup_memory(void) @@ -771,152 +822,52 @@ static void __init setup_memory(void) memblock_enforce_memory_limit(memblock_end_of_DRAM()); } -/* - * Setup hardware capabilities. - */ -static int __init setup_hwcaps(void) +static void __init relocate_amode31_section(void) { - static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; - struct cpuid cpu_id; - int i; - - /* - * The store facility list bits numbers as found in the principles - * of operation are numbered with bit 1UL<<31 as number 0 to - * bit 1UL<<0 as number 31. - * Bit 0: instructions named N3, "backported" to esa-mode - * Bit 2: z/Architecture mode is active - * Bit 7: the store-facility-list-extended facility is installed - * Bit 17: the message-security assist is installed - * Bit 19: the long-displacement facility is installed - * Bit 21: the extended-immediate facility is installed - * Bit 22: extended-translation facility 3 is installed - * Bit 30: extended-translation facility 3 enhancement facility - * These get translated to: - * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1, - * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3, - * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and - * HWCAP_S390_ETF3EH bit 8 (22 && 30). - */ - for (i = 0; i < 6; i++) - if (test_facility(stfl_bits[i])) - elf_hwcap |= 1UL << i; - - if (test_facility(22) && test_facility(30)) - elf_hwcap |= HWCAP_S390_ETF3EH; - - /* - * Check for additional facilities with store-facility-list-extended. - * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0 - * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information - * as stored by stfl, bits 32-xxx contain additional facilities. - * How many facility words are stored depends on the number of - * doublewords passed to the instruction. The additional facilities - * are: - * Bit 42: decimal floating point facility is installed - * Bit 44: perform floating point operation facility is installed - * translated to: - * HWCAP_S390_DFP bit 6 (42 && 44). - */ - if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44)) - elf_hwcap |= HWCAP_S390_DFP; - - /* - * Huge page support HWCAP_S390_HPAGE is bit 7. - */ - if (MACHINE_HAS_EDAT1) - elf_hwcap |= HWCAP_S390_HPAGE; - - /* - * 64-bit register support for 31-bit processes - * HWCAP_S390_HIGH_GPRS is bit 9. - */ - elf_hwcap |= HWCAP_S390_HIGH_GPRS; - - /* - * Transactional execution support HWCAP_S390_TE is bit 10. - */ - if (MACHINE_HAS_TE) - elf_hwcap |= HWCAP_S390_TE; - - /* - * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension - * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX - * instead of facility bit 129. - */ - if (MACHINE_HAS_VX) { - elf_hwcap |= HWCAP_S390_VXRS; - if (test_facility(134)) - elf_hwcap |= HWCAP_S390_VXRS_BCD; - if (test_facility(135)) - elf_hwcap |= HWCAP_S390_VXRS_EXT; - if (test_facility(148)) - elf_hwcap |= HWCAP_S390_VXRS_EXT2; - if (test_facility(152)) - elf_hwcap |= HWCAP_S390_VXRS_PDE; - } - if (test_facility(150)) - elf_hwcap |= HWCAP_S390_SORT; - if (test_facility(151)) - elf_hwcap |= HWCAP_S390_DFLT; - - /* - * Guarded storage support HWCAP_S390_GS is bit 12. - */ - if (MACHINE_HAS_GS) - elf_hwcap |= HWCAP_S390_GS; - - get_cpu_id(&cpu_id); - add_device_randomness(&cpu_id, sizeof(cpu_id)); - switch (cpu_id.machine) { - case 0x2064: - case 0x2066: - default: /* Use "z900" as default for 64 bit kernels. */ - strcpy(elf_platform, "z900"); - break; - case 0x2084: - case 0x2086: - strcpy(elf_platform, "z990"); - break; - case 0x2094: - case 0x2096: - strcpy(elf_platform, "z9-109"); - break; - case 0x2097: - case 0x2098: - strcpy(elf_platform, "z10"); - break; - case 0x2817: - case 0x2818: - strcpy(elf_platform, "z196"); - break; - case 0x2827: - case 0x2828: - strcpy(elf_platform, "zEC12"); - break; - case 0x2964: - case 0x2965: - strcpy(elf_platform, "z13"); - break; - case 0x3906: - case 0x3907: - strcpy(elf_platform, "z14"); - break; - case 0x8561: - case 0x8562: - strcpy(elf_platform, "z15"); - break; - } - - /* - * Virtualization support HWCAP_INT_SIE is bit 0. - */ - if (sclp.has_sief2) - int_hwcap |= HWCAP_INT_SIE; + unsigned long amode31_addr, amode31_size; + long amode31_offset; + long *ptr; + + /* Allocate a new AMODE31 capable memory region */ + amode31_size = __eamode31 - __samode31; + pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); + amode31_addr = (unsigned long)memblock_alloc_low(amode31_size, PAGE_SIZE); + if (!amode31_addr) + panic("Failed to allocate memory for AMODE31 section\n"); + amode31_offset = amode31_addr - __samode31; + + /* Move original AMODE31 section to the new one */ + memmove((void *)amode31_addr, (void *)__samode31, amode31_size); + /* Zero out the old AMODE31 section to catch invalid accesses within it */ + memset((void *)__samode31, 0, amode31_size); + + /* Update all AMODE31 region references */ + for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++) + *ptr += amode31_offset; +} - return 0; +/* This must be called after AMODE31 relocation */ +static void __init setup_cr(void) +{ + union ctlreg2 cr2; + union ctlreg5 cr5; + union ctlreg15 cr15; + + __ctl_duct[1] = (unsigned long)__ctl_aste; + __ctl_duct[2] = (unsigned long)__ctl_aste; + __ctl_duct[4] = (unsigned long)__ctl_duald; + + /* Update control registers CR2, CR5 and CR15 */ + __ctl_store(cr2.val, 2, 2); + __ctl_store(cr5.val, 5, 5); + __ctl_store(cr15.val, 15, 15); + cr2.ducto = (unsigned long)__ctl_duct >> 6; + cr5.pasteo = (unsigned long)__ctl_duct >> 6; + cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3; + __ctl_load(cr2.val, 2, 2); + __ctl_load(cr5.val, 5, 5); + __ctl_load(cr15.val, 15, 15); } -arch_initcall(setup_hwcaps); /* * Add system information as device randomness @@ -1059,6 +1010,9 @@ void __init setup_arch(char **cmdline_p) free_mem_detect_info(); + relocate_amode31_section(); + setup_cr(); + setup_uv(); setup_memory_end(); setup_memory(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 78ef53b29958..307f5d99514d 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -533,9 +533,3 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) */ restore_saved_sigmask(); } - -void do_notify_resume(struct pt_regs *regs) -{ - tracehook_notify_resume(regs); - rseq_handle_notify_resume(NULL, regs); -} diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 8984711f72ed..2a991e43ead3 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -252,6 +252,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); lc->cpu_nr = cpu; + lc->restart_flags = RESTART_FLAG_CTLREGS; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; @@ -294,10 +295,10 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; - lc->restart_stack = lc->nodat_stack; + lc->restart_stack = lc->kernel_stack; lc->restart_fn = (unsigned long) func; lc->restart_data = (unsigned long) data; - lc->restart_source = -1UL; + lc->restart_source = -1U; pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); } @@ -311,12 +312,12 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) func(data); /* should not return */ } -static void __no_sanitize_address pcpu_delegate(struct pcpu *pcpu, - pcpu_delegate_fn *func, - void *data, unsigned long stack) +static void pcpu_delegate(struct pcpu *pcpu, + pcpu_delegate_fn *func, + void *data, unsigned long stack) { struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; - unsigned long source_cpu = stap(); + unsigned int source_cpu = stap(); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); if (pcpu->address == source_cpu) { @@ -569,6 +570,9 @@ static void smp_ctl_bit_callback(void *info) __ctl_load(cregs, 0, 15); } +static DEFINE_SPINLOCK(ctl_lock); +static unsigned long ctlreg; + /* * Set a bit in a control register of all cpus */ @@ -576,6 +580,11 @@ void smp_ctl_set_bit(int cr, int bit) { struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; + spin_lock(&ctl_lock); + memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg)); + __set_bit(bit, &ctlreg); + memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg)); + spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_set_bit); @@ -587,6 +596,11 @@ void smp_ctl_clear_bit(int cr, int bit) { struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; + spin_lock(&ctl_lock); + memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg)); + __clear_bit(bit, &ctlreg); + memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg)); + spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_clear_bit); @@ -673,7 +687,7 @@ void __init smp_save_dump_cpus(void) unsigned long page; bool is_boot_cpu; - if (!(OLDMEM_BASE || is_ipl_type_dump())) + if (!(oldmem_data.start || is_ipl_type_dump())) /* No previous system present, normal boot. */ return; /* Allocate a page as dumping area for the store status sigps */ @@ -704,12 +718,12 @@ void __init smp_save_dump_cpus(void) * these registers an SCLP request is required which is * done by drivers/s390/char/zcore.c:init_cpu_info() */ - if (!is_boot_cpu || OLDMEM_BASE) + if (!is_boot_cpu || oldmem_data.start) /* Get the CPU registers */ smp_save_cpu_regs(sa, addr, is_boot_cpu, page); } memblock_free(page, PAGE_SIZE); - diag_dma_ops.diag308_reset(); + diag_amode31_ops.diag308_reset(); pcpu_set_smt(0); } #endif /* CONFIG_CRASH_DUMP */ @@ -793,7 +807,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) u16 core_id; int nr, i; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&smp_cpu_state_mutex); nr = 0; cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); @@ -816,7 +830,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) nr += smp_add_core(&info->core[i], &avail, configured, early); } mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); + cpus_read_unlock(); return nr; } @@ -868,11 +882,19 @@ void __init smp_detect_cpus(void) memblock_free_early((unsigned long)info, sizeof(*info)); } -static void smp_init_secondary(void) +/* + * Activate a secondary processor. + */ +static void smp_start_secondary(void *cpuvoid) { int cpu = raw_smp_processor_id(); S390_lowcore.last_update_clock = get_tod_clock(); + S390_lowcore.restart_stack = (unsigned long)restart_stack; + S390_lowcore.restart_fn = (unsigned long)do_restart; + S390_lowcore.restart_data = 0; + S390_lowcore.restart_source = -1U; + S390_lowcore.restart_flags = 0; restore_access_regs(S390_lowcore.access_regs_save_area); cpu_init(); rcu_cpu_starting(cpu); @@ -892,20 +914,6 @@ static void smp_init_secondary(void) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -/* - * Activate a secondary processor. - */ -static void __no_sanitize_address smp_start_secondary(void *cpuvoid) -{ - S390_lowcore.restart_stack = (unsigned long) restart_stack; - S390_lowcore.restart_fn = (unsigned long) do_restart; - S390_lowcore.restart_data = 0; - S390_lowcore.restart_source = -1UL; - __ctl_load(S390_lowcore.cregs_save_area, 0, 15); - __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); - call_on_stack_noreturn(smp_init_secondary, S390_lowcore.kernel_stack); -} - /* Upping and downing of CPUs */ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { @@ -1055,7 +1063,7 @@ static ssize_t cpu_configure_store(struct device *dev, return -EINVAL; if (val != 0 && val != 1) return -EINVAL; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&smp_cpu_state_mutex); rc = -EBUSY; /* disallow configuration changes of online cpus and cpu 0 */ @@ -1104,7 +1112,7 @@ static ssize_t cpu_configure_store(struct device *dev, } out: mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); + cpus_read_unlock(); return rc ? rc : count; } static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S new file mode 100644 index 000000000000..868e4a604110 --- /dev/null +++ b/arch/s390/kernel/text_amode31.S @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Code that needs to run below 2 GB. + * + * Copyright IBM Corp. 2019 + */ + +#include <linux/linkage.h> +#include <asm/errno.h> +#include <asm/sigp.h> + + .section .amode31.text,"ax" +/* + * Simplified version of expoline thunk. The normal thunks can not be used here, + * because they might be more than 2 GB away, and not reachable by the relative + * branch. No comdat, exrl, etc. optimizations used here, because it only + * affects a few functions that are not performance-relevant. + */ + .macro BR_EX_AMODE31_r14 + larl %r1,0f + ex 0,0(%r1) + j . +0: br %r14 + .endm + +/* + * int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode) + */ +ENTRY(_diag14_amode31) + lgr %r1,%r2 + lgr %r2,%r3 + lgr %r3,%r4 + lhi %r5,-EIO + sam31 + diag %r1,%r2,0x14 +.Ldiag14_ex: + ipm %r5 + srl %r5,28 +.Ldiag14_fault: + sam64 + lgfr %r2,%r5 + BR_EX_AMODE31_r14 + EX_TABLE_AMODE31(.Ldiag14_ex, .Ldiag14_fault) +ENDPROC(_diag14_amode31) + +/* + * int _diag210_amode31(struct diag210 *addr) + */ +ENTRY(_diag210_amode31) + lgr %r1,%r2 + lhi %r2,-1 + sam31 + diag %r1,%r0,0x210 +.Ldiag210_ex: + ipm %r2 + srl %r2,28 +.Ldiag210_fault: + sam64 + lgfr %r2,%r2 + BR_EX_AMODE31_r14 + EX_TABLE_AMODE31(.Ldiag210_ex, .Ldiag210_fault) +ENDPROC(_diag210_amode31) + +/* + * int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode) + */ +ENTRY(_diag26c_amode31) + lghi %r5,-EOPNOTSUPP + sam31 + diag %r2,%r4,0x26c +.Ldiag26c_ex: + sam64 + lgfr %r2,%r5 + BR_EX_AMODE31_r14 + EX_TABLE_AMODE31(.Ldiag26c_ex, .Ldiag26c_ex) +ENDPROC(_diag26c_amode31) + +/* + * void _diag0c_amode31(struct hypfs_diag0c_entry *entry) + */ +ENTRY(_diag0c_amode31) + sam31 + diag %r2,%r2,0x0c + sam64 + BR_EX_AMODE31_r14 +ENDPROC(_diag0c_amode31) + +/* + * void _diag308_reset_amode31(void) + * + * Calls diag 308 subcode 1 and continues execution + */ +ENTRY(_diag308_reset_amode31) + larl %r4,.Lctlregs # Save control registers + stctg %c0,%c15,0(%r4) + lg %r2,0(%r4) # Disable lowcore protection + nilh %r2,0xefff + larl %r4,.Lctlreg0 + stg %r2,0(%r4) + lctlg %c0,%c0,0(%r4) + larl %r4,.Lfpctl # Floating point control register + stfpc 0(%r4) + larl %r4,.Lprefix # Save prefix register + stpx 0(%r4) + larl %r4,.Lprefix_zero # Set prefix register to 0 + spx 0(%r4) + larl %r4,.Lcontinue_psw # Save PSW flags + epsw %r2,%r3 + stm %r2,%r3,0(%r4) + larl %r4,.Lrestart_part2 # Setup restart PSW at absolute 0 + larl %r3,.Lrestart_diag308_psw + og %r4,0(%r3) # Save PSW + lghi %r3,0 + sturg %r4,%r3 # Use sturg, because of large pages + lghi %r1,1 + lghi %r0,0 + diag %r0,%r1,0x308 +.Lrestart_part2: + lhi %r0,0 # Load r0 with zero + lhi %r1,2 # Use mode 2 = ESAME (dump) + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode + sam64 # Switch to 64 bit addressing mode + larl %r4,.Lctlregs # Restore control registers + lctlg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Restore floating point ctl register + lfpc 0(%r4) + larl %r4,.Lprefix # Restore prefix register + spx 0(%r4) + larl %r4,.Lcontinue_psw # Restore PSW flags + larl %r2,.Lcontinue + stg %r2,8(%r4) + lpswe 0(%r4) +.Lcontinue: + BR_EX_AMODE31_r14 +ENDPROC(_diag308_reset_amode31) + + .section .amode31.data,"aw",@progbits +.align 8 +.Lrestart_diag308_psw: + .long 0x00080000,0x80000000 + +.align 8 +.Lcontinue_psw: + .quad 0,0 + +.align 8 +.Lctlreg0: + .quad 0 +.Lctlregs: + .rept 16 + .quad 0 + .endr +.Lfpctl: + .long 0 +.Lprefix: + .long 0 +.Lprefix_zero: + .long 0 diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 26aa2614ee35..d2458a29618f 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -406,7 +406,7 @@ static ssize_t dispatching_store(struct device *dev, if (val != 0 && val != 1) return -EINVAL; rc = 0; - get_online_cpus(); + cpus_read_lock(); mutex_lock(&smp_cpu_state_mutex); if (cpu_management == val) goto out; @@ -417,7 +417,7 @@ static ssize_t dispatching_store(struct device *dev, topology_expect_change(); out: mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); + cpus_read_unlock(); return rc ? rc : count; } static DEVICE_ATTR_RW(dispatching); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 76947275fe8b..bcefc2173de4 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -291,7 +291,7 @@ static void __init test_monitor_call(void) void __init trap_init(void) { - sort_extable(__start_dma_ex_table, __stop_dma_ex_table); + sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); local_mcck_enable(); test_monitor_call(); } diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index aeb0a15bcbb7..5a656c7b7a67 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -51,24 +51,9 @@ void __init setup_uv(void) { unsigned long uv_stor_base; - /* - * keep these conditions in line with has_uv_sec_stor_limit() - */ if (!is_prot_virt_host()) return; - if (is_prot_virt_guest()) { - prot_virt_host = 0; - pr_warn("Protected virtualization not available in protected guests."); - return; - } - - if (!test_facility(158)) { - prot_virt_host = 0; - pr_warn("Protected virtualization not supported by the hardware."); - return; - } - uv_stor_base = (unsigned long)memblock_alloc_try_nid( uv_info.uv_base_stor_len, SZ_1M, SZ_2G, MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 3457dcf10396..e3e6ac5686df 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -36,6 +36,7 @@ CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n +KCSAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 2a2092ce19f1..6568de236701 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -39,6 +39,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n +KCSAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 4c0e19145cc6..63bdb9e1bfc1 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -5,6 +5,7 @@ #include <asm/thread_info.h> #include <asm/page.h> +#include <asm/ftrace.lds.h> /* * Put .bss..swapper_pg_dir as the first thing in .bss. This will @@ -46,6 +47,7 @@ SECTIONS KPROBES_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT + FTRACE_HOTPATCH_TRAMPOLINES_TEXT *(.text.*_indirect_*) *(.fixup) *(.gnu.warning) @@ -71,6 +73,13 @@ SECTIONS RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE) BOOT_DATA_PRESERVED + . = ALIGN(8); + .amode31.refs : { + _start_amode31_refs = .; + *(.amode31.refs) + _end_amode31_refs = .; + } + _edata = .; /* End of data section */ /* will be freed after init */ @@ -136,6 +145,32 @@ SECTIONS BOOT_DATA + /* + * .amode31 section for code, data, ex_table that need to stay + * below 2 GB, even when the kernel is relocated above 2 GB. + */ + . = ALIGN(PAGE_SIZE); + _samode31 = .; + .amode31.text : { + _stext_amode31 = .; + *(.amode31.text) + *(.amode31.text.*_indirect_*) + . = ALIGN(PAGE_SIZE); + _etext_amode31 = .; + } + . = ALIGN(16); + .amode31.ex_table : { + _start_amode31_ex_table = .; + KEEP(*(.amode31.ex_table)) + _stop_amode31_ex_table = .; + } + . = ALIGN(PAGE_SIZE); + .amode31.data : { + *(.amode31.data) + } + . = ALIGN(PAGE_SIZE); + _eamode31 = .; + /* early.c uses stsi, which requires page aligned data. */ . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) |