From 22155914b66b348b7113a0b3baf96a72bd3f643d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 8 Dec 2006 15:53:49 +0100 Subject: [S390] uaccess_pt: add missing down_read() and convert to is_init(). Doesn't seem to be a good idea to duplicate code :) Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/lib/uaccess_pt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 8741bdc09299..633249c3ba91 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -8,8 +8,8 @@ */ #include -#include #include +#include #include static inline int __handle_fault(struct mm_struct *mm, unsigned long address, @@ -60,8 +60,9 @@ out: out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); + down_read(&mm->mmap_sem); goto survive; } printk("VM: killing process %s\n", current->comm); -- cgit v1.2.3 From 34249d0f9243fce773c2fa352934ba108320e234 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 8 Dec 2006 15:54:18 +0100 Subject: [S390] runtime switch for qdio performance statistics Remove CONFIG_QETH_PERF_STATS and use a sysfs attribute instead. We want to have the ability to turn the statistics on/off at runtime. Signed-off-by: Ursula Braun Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 8 -- arch/s390/defconfig | 1 - drivers/s390/cio/qdio.c | 226 ++++++++++++++++++++++++++---------------------- drivers/s390/cio/qdio.h | 28 +++--- 4 files changed, 135 insertions(+), 128 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 583d9ff0a571..a08e9100e7e4 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -258,14 +258,6 @@ config QDIO If unsure, say Y. -config QDIO_PERF_STATS - bool "Performance statistics in /proc" - depends on QDIO - help - Say Y here to get performance statistics in /proc/qdio_perf - - If unsure, say N. - config QDIO_DEBUG bool "Extended debugging information" depends on QDIO diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 7cd51e73e274..a6ec919ba83f 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -134,7 +134,6 @@ CONFIG_RESOURCES_64BIT=y # CONFIG_MACHCHK_WARNING=y CONFIG_QDIO=y -# CONFIG_QDIO_PERF_STATS is not set # CONFIG_QDIO_DEBUG is not set # diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index d066dbf2c65d..9d4ea449a608 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -46,6 +46,7 @@ #include #include +#include #include #include "cio.h" @@ -65,12 +66,12 @@ MODULE_LICENSE("GPL"); /******************** HERE WE GO ***********************************/ static const char version[] = "QDIO base support version 2"; +extern struct bus_type ccw_bus_type; -#ifdef QDIO_PERFORMANCE_STATS +static int qdio_performance_stats = 0; static int proc_perf_file_registration; static unsigned long i_p_c, i_p_nc, o_p_c, o_p_nc, ii_p_c, ii_p_nc; static struct qdio_perf_stats perf_stats; -#endif /* QDIO_PERFORMANCE_STATS */ static int hydra_thinints; static int is_passthrough = 0; @@ -275,9 +276,8 @@ qdio_siga_sync(struct qdio_q *q, unsigned int gpr2, QDIO_DBF_TEXT4(0,trace,"sigasync"); QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.siga_syncs++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + perf_stats.siga_syncs++; cc = do_siga_sync(q->schid, gpr2, gpr3); if (cc) @@ -322,9 +322,8 @@ qdio_siga_output(struct qdio_q *q) __u32 busy_bit; __u64 start_time=0; -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.siga_outs++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + perf_stats.siga_outs++; QDIO_DBF_TEXT4(0,trace,"sigaout"); QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); @@ -358,9 +357,8 @@ qdio_siga_input(struct qdio_q *q) QDIO_DBF_TEXT4(0,trace,"sigain"); QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.siga_ins++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + perf_stats.siga_ins++; cc = do_siga_input(q->schid, q->mask); @@ -954,9 +952,8 @@ __qdio_outbound_processing(struct qdio_q *q) if (unlikely(qdio_reserve_q(q))) { qdio_release_q(q); -#ifdef QDIO_PERFORMANCE_STATS - o_p_c++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + o_p_c++; /* as we're sissies, we'll check next time */ if (likely(!atomic_read(&q->is_in_shutdown))) { qdio_mark_q(q); @@ -964,10 +961,10 @@ __qdio_outbound_processing(struct qdio_q *q) } return; } -#ifdef QDIO_PERFORMANCE_STATS - o_p_nc++; - perf_stats.tl_runs++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) { + o_p_nc++; + perf_stats.tl_runs++; + } /* see comment in qdio_kick_outbound_q */ siga_attempts=atomic_read(&q->busy_siga_counter); @@ -1142,15 +1139,16 @@ qdio_has_inbound_q_moved(struct qdio_q *q) { int i; -#ifdef QDIO_PERFORMANCE_STATS static int old_pcis=0; static int old_thinints=0; - if ((old_pcis==perf_stats.pcis)&&(old_thinints==perf_stats.thinints)) - perf_stats.start_time_inbound=NOW; - else - old_pcis=perf_stats.pcis; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) { + if ((old_pcis==perf_stats.pcis)&& + (old_thinints==perf_stats.thinints)) + perf_stats.start_time_inbound=NOW; + else + old_pcis=perf_stats.pcis; + } i=qdio_get_inbound_buffer_frontier(q); if ( (i!=GET_SAVED_FRONTIER(q)) || @@ -1340,10 +1338,10 @@ qdio_kick_inbound_handler(struct qdio_q *q) q->siga_error=0; q->error_status_flags=0; -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.inbound_time+=NOW-perf_stats.start_time_inbound; - perf_stats.inbound_cnt++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) { + perf_stats.inbound_time+=NOW-perf_stats.start_time_inbound; + perf_stats.inbound_cnt++; + } } static inline void @@ -1363,9 +1361,8 @@ __tiqdio_inbound_processing(struct qdio_q *q, int spare_ind_was_set) */ if (unlikely(qdio_reserve_q(q))) { qdio_release_q(q); -#ifdef QDIO_PERFORMANCE_STATS - ii_p_c++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + ii_p_c++; /* * as we might just be about to stop polling, we make * sure that we check again at least once more @@ -1373,9 +1370,8 @@ __tiqdio_inbound_processing(struct qdio_q *q, int spare_ind_was_set) tiqdio_sched_tl(); return; } -#ifdef QDIO_PERFORMANCE_STATS - ii_p_nc++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + ii_p_nc++; if (unlikely(atomic_read(&q->is_in_shutdown))) { qdio_unmark_q(q); goto out; @@ -1416,11 +1412,11 @@ __tiqdio_inbound_processing(struct qdio_q *q, int spare_ind_was_set) irq_ptr = (struct qdio_irq*)q->irq_ptr; for (i=0;ino_output_qs;i++) { oq = irq_ptr->output_qs[i]; -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.tl_runs--; -#endif /* QDIO_PERFORMANCE_STATS */ - if (!qdio_is_outbound_q_done(oq)) + if (!qdio_is_outbound_q_done(oq)) { + if (qdio_performance_stats) + perf_stats.tl_runs--; __qdio_outbound_processing(oq); + } } } @@ -1457,9 +1453,8 @@ __qdio_inbound_processing(struct qdio_q *q) if (unlikely(qdio_reserve_q(q))) { qdio_release_q(q); -#ifdef QDIO_PERFORMANCE_STATS - i_p_c++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + i_p_c++; /* as we're sissies, we'll check next time */ if (likely(!atomic_read(&q->is_in_shutdown))) { qdio_mark_q(q); @@ -1467,10 +1462,10 @@ __qdio_inbound_processing(struct qdio_q *q) } return; } -#ifdef QDIO_PERFORMANCE_STATS - i_p_nc++; - perf_stats.tl_runs++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) { + i_p_nc++; + perf_stats.tl_runs++; + } again: if (qdio_has_inbound_q_moved(q)) { @@ -1516,9 +1511,8 @@ tiqdio_reset_processing_state(struct qdio_q *q, int q_laps) if (unlikely(qdio_reserve_q(q))) { qdio_release_q(q); -#ifdef QDIO_PERFORMANCE_STATS - ii_p_c++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + ii_p_c++; /* * as we might just be about to stop polling, we make * sure that we check again at least once more @@ -1609,9 +1603,8 @@ tiqdio_tl(unsigned long data) { QDIO_DBF_TEXT4(0,trace,"iqdio_tl"); -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.tl_runs++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + perf_stats.tl_runs++; tiqdio_inbound_checks(); } @@ -1918,10 +1911,10 @@ tiqdio_thinint_handler(void) { QDIO_DBF_TEXT4(0,trace,"thin_int"); -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.thinints++; - perf_stats.start_time_inbound=NOW; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) { + perf_stats.thinints++; + perf_stats.start_time_inbound=NOW; + } /* SVS only when needed: * issue SVS to benefit from iqdio interrupt avoidance @@ -1976,18 +1969,17 @@ qdio_handle_pci(struct qdio_irq *irq_ptr) int i; struct qdio_q *q; -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.pcis++; - perf_stats.start_time_inbound=NOW; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) { + perf_stats.pcis++; + perf_stats.start_time_inbound=NOW; + } for (i=0;ino_input_qs;i++) { q=irq_ptr->input_qs[i]; if (q->is_input_q&QDIO_FLAG_NO_INPUT_INTERRUPT_CONTEXT) qdio_mark_q(q); else { -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.tl_runs--; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + perf_stats.tl_runs--; __qdio_inbound_processing(q); } } @@ -1995,11 +1987,10 @@ qdio_handle_pci(struct qdio_irq *irq_ptr) return; for (i=0;ino_output_qs;i++) { q=irq_ptr->output_qs[i]; -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.tl_runs--; -#endif /* QDIO_PERFORMANCE_STATS */ if (qdio_is_outbound_q_done(q)) continue; + if (qdio_performance_stats) + perf_stats.tl_runs--; if (!irq_ptr->sync_done_on_outb_pcis) SYNC_MEMORY; __qdio_outbound_processing(q); @@ -3460,19 +3451,18 @@ do_qdio_handle_outbound(struct qdio_q *q, unsigned int callflags, struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; /* This is the outbound handling of queues */ -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.start_time_outbound=NOW; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + perf_stats.start_time_outbound=NOW; qdio_do_qdio_fill_output(q,qidx,count,buffers); used_elements=atomic_add_return(count, &q->number_of_buffers_used) - count; if (callflags&QDIO_FLAG_DONT_SIGA) { -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.outbound_time+=NOW-perf_stats.start_time_outbound; - perf_stats.outbound_cnt++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) { + perf_stats.outbound_time+=NOW-perf_stats.start_time_outbound; + perf_stats.outbound_cnt++; + } return; } if (q->is_iqdio_q) { @@ -3502,9 +3492,8 @@ do_qdio_handle_outbound(struct qdio_q *q, unsigned int callflags, qdio_kick_outbound_q(q); } else { QDIO_DBF_TEXT3(0,trace, "fast-req"); -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.fast_reqs++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) + perf_stats.fast_reqs++; } } /* @@ -3515,10 +3504,10 @@ do_qdio_handle_outbound(struct qdio_q *q, unsigned int callflags, __qdio_outbound_processing(q); } -#ifdef QDIO_PERFORMANCE_STATS - perf_stats.outbound_time+=NOW-perf_stats.start_time_outbound; - perf_stats.outbound_cnt++; -#endif /* QDIO_PERFORMANCE_STATS */ + if (qdio_performance_stats) { + perf_stats.outbound_time+=NOW-perf_stats.start_time_outbound; + perf_stats.outbound_cnt++; + } } /* count must be 1 in iqdio */ @@ -3576,7 +3565,6 @@ do_QDIO(struct ccw_device *cdev,unsigned int callflags, return 0; } -#ifdef QDIO_PERFORMANCE_STATS static int qdio_perf_procfile_read(char *buffer, char **buffer_location, off_t offset, int buffer_length, int *eof, void *data) @@ -3592,29 +3580,29 @@ qdio_perf_procfile_read(char *buffer, char **buffer_location, off_t offset, _OUTP_IT("i_p_nc/c=%lu/%lu\n",i_p_nc,i_p_c); _OUTP_IT("ii_p_nc/c=%lu/%lu\n",ii_p_nc,ii_p_c); _OUTP_IT("o_p_nc/c=%lu/%lu\n",o_p_nc,o_p_c); - _OUTP_IT("Number of tasklet runs (total) : %u\n", + _OUTP_IT("Number of tasklet runs (total) : %lu\n", perf_stats.tl_runs); _OUTP_IT("\n"); - _OUTP_IT("Number of SIGA sync's issued : %u\n", + _OUTP_IT("Number of SIGA sync's issued : %lu\n", perf_stats.siga_syncs); - _OUTP_IT("Number of SIGA in's issued : %u\n", + _OUTP_IT("Number of SIGA in's issued : %lu\n", perf_stats.siga_ins); - _OUTP_IT("Number of SIGA out's issued : %u\n", + _OUTP_IT("Number of SIGA out's issued : %lu\n", perf_stats.siga_outs); - _OUTP_IT("Number of PCIs caught : %u\n", + _OUTP_IT("Number of PCIs caught : %lu\n", perf_stats.pcis); - _OUTP_IT("Number of adapter interrupts caught : %u\n", + _OUTP_IT("Number of adapter interrupts caught : %lu\n", perf_stats.thinints); - _OUTP_IT("Number of fast requeues (outg. SBALs w/o SIGA) : %u\n", + _OUTP_IT("Number of fast requeues (outg. SBALs w/o SIGA) : %lu\n", perf_stats.fast_reqs); _OUTP_IT("\n"); - _OUTP_IT("Total time of all inbound actions (us) incl. UL : %u\n", + _OUTP_IT("Total time of all inbound actions (us) incl. UL : %lu\n", perf_stats.inbound_time); - _OUTP_IT("Number of inbound transfers : %u\n", + _OUTP_IT("Number of inbound transfers : %lu\n", perf_stats.inbound_cnt); - _OUTP_IT("Total time of all outbound do_QDIOs (us) : %u\n", + _OUTP_IT("Total time of all outbound do_QDIOs (us) : %lu\n", perf_stats.outbound_time); - _OUTP_IT("Number of do_QDIOs outbound : %u\n", + _OUTP_IT("Number of do_QDIOs outbound : %lu\n", perf_stats.outbound_cnt); _OUTP_IT("\n"); @@ -3622,12 +3610,10 @@ qdio_perf_procfile_read(char *buffer, char **buffer_location, off_t offset, } static struct proc_dir_entry *qdio_perf_proc_file; -#endif /* QDIO_PERFORMANCE_STATS */ static void qdio_add_procfs_entry(void) { -#ifdef QDIO_PERFORMANCE_STATS proc_perf_file_registration=0; qdio_perf_proc_file=create_proc_entry(QDIO_PERF, S_IFREG|0444,&proc_root); @@ -3639,20 +3625,58 @@ qdio_add_procfs_entry(void) QDIO_PRINT_WARN("was not able to register perf. " \ "proc-file (%i).\n", proc_perf_file_registration); -#endif /* QDIO_PERFORMANCE_STATS */ } static void qdio_remove_procfs_entry(void) { -#ifdef QDIO_PERFORMANCE_STATS perf_stats.tl_runs=0; if (!proc_perf_file_registration) /* means if it went ok earlier */ remove_proc_entry(QDIO_PERF,&proc_root); -#endif /* QDIO_PERFORMANCE_STATS */ } +/** + * attributes in sysfs + *****************************************************************************/ + +static ssize_t +qdio_performance_stats_show(struct bus_type *bus, char *buf) +{ + return sprintf(buf, "%i\n", qdio_performance_stats ? 1 : 0); +} + +static ssize_t +qdio_performance_stats_store(struct bus_type *bus, const char *buf, size_t count) +{ + char *tmp; + int i; + + i = simple_strtoul(buf, &tmp, 16); + if ((i == 0) || (i == 1)) { + if (i == qdio_performance_stats) + return count; + qdio_performance_stats = i; + if (i==0) { + /* reset perf. stat. info */ + i_p_nc = 0; + i_p_c = 0; + ii_p_nc = 0; + ii_p_c = 0; + o_p_nc = 0; + o_p_c = 0; + memset(&perf_stats, 0, sizeof(struct qdio_perf_stats)); + } + } else { + QDIO_PRINT_WARN("QDIO performance_stats: write 0 or 1 to this file!\n"); + return -EINVAL; + } + return count; +} + +static BUS_ATTR(qdio_performance_stats, 0644, qdio_performance_stats_show, + qdio_performance_stats_store); + static void tiqdio_register_thinints(void) { @@ -3697,6 +3721,7 @@ qdio_release_qdio_memory(void) kfree(indicators); } + static void qdio_unregister_dbf_views(void) { @@ -3798,9 +3823,7 @@ static int __init init_QDIO(void) { int res; -#ifdef QDIO_PERFORMANCE_STATS void *ptr; -#endif /* QDIO_PERFORMANCE_STATS */ printk("qdio: loading %s\n",version); @@ -3813,13 +3836,12 @@ init_QDIO(void) return res; QDIO_DBF_TEXT0(0,setup,"initQDIO"); + res = bus_create_file(&ccw_bus_type, &bus_attr_qdio_performance_stats); -#ifdef QDIO_PERFORMANCE_STATS - memset((void*)&perf_stats,0,sizeof(perf_stats)); + memset((void*)&perf_stats,0,sizeof(perf_stats)); QDIO_DBF_TEXT0(0,setup,"perfstat"); ptr=&perf_stats; QDIO_DBF_HEX0(0,setup,&ptr,sizeof(void*)); -#endif /* QDIO_PERFORMANCE_STATS */ qdio_add_procfs_entry(); @@ -3843,7 +3865,7 @@ cleanup_QDIO(void) qdio_release_qdio_memory(); qdio_unregister_dbf_views(); mempool_destroy(qdio_mempool_scssc); - + bus_remove_file(&ccw_bus_type, &bus_attr_qdio_performance_stats); printk("qdio: %s: module removed\n",version); } diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 42927c1b7451..ec9af72b2afc 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -12,10 +12,6 @@ #endif /* CONFIG_QDIO_DEBUG */ #define QDIO_USE_PROCESSING_STATE -#ifdef CONFIG_QDIO_PERF_STATS -#define QDIO_PERFORMANCE_STATS -#endif /* CONFIG_QDIO_PERF_STATS */ - #define QDIO_MINIMAL_BH_RELIEF_TIME 16 #define QDIO_TIMER_POLL_VALUE 1 #define IQDIO_TIMER_POLL_VALUE 1 @@ -409,25 +405,23 @@ do_clear_global_summary(void) #define CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS 0x08 #define CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS 0x04 -#ifdef QDIO_PERFORMANCE_STATS struct qdio_perf_stats { - unsigned int tl_runs; + unsigned long tl_runs; - unsigned int siga_outs; - unsigned int siga_ins; - unsigned int siga_syncs; - unsigned int pcis; - unsigned int thinints; - unsigned int fast_reqs; + unsigned long siga_outs; + unsigned long siga_ins; + unsigned long siga_syncs; + unsigned long pcis; + unsigned long thinints; + unsigned long fast_reqs; __u64 start_time_outbound; - unsigned int outbound_cnt; - unsigned int outbound_time; + unsigned long outbound_cnt; + unsigned long outbound_time; __u64 start_time_inbound; - unsigned int inbound_cnt; - unsigned int inbound_time; + unsigned long inbound_cnt; + unsigned long inbound_time; }; -#endif /* QDIO_PERFORMANCE_STATS */ /* unlikely as the later the better */ #define SYNC_MEMORY if (unlikely(q->siga_sync)) qdio_siga_sync_q(q) -- cgit v1.2.3 From f4eb07c17df2e6cf9bd58bfcd9cc9e05e9489d07 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 8 Dec 2006 15:56:07 +0100 Subject: [S390] Virtual memmap for s390. Virtual memmap support for s390. Inspired by the ia64 implementation. Unlike ia64 we need a mechanism which allows us to dynamically attach shared memory regions. These memory regions are accessed via the dcss device driver. dcss implements the 'direct_access' operation, which requires struct pages for every single shared page. Therefore this implementation provides an interface to attach/detach shared memory: int add_shared_memory(unsigned long start, unsigned long size); int remove_shared_memory(unsigned long start, unsigned long size); The purpose of the add_shared_memory function is to add the given memory range to the 1:1 mapping and to make sure that the corresponding range in the vmemmap is backed with physical pages. It also initialises the new struct pages. remove_shared_memory in turn only invalidates the page table entries in the 1:1 mapping. The page tables and the memory used for struct pages in the vmemmap are currently not freed. They will be reused when the next segment will be attached. Given that the maximum size of a shared memory region is 2GB and in addition all regions must reside below 2GB this is not too much of a restriction, but there is room for improvement. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 3 + arch/s390/kernel/setup.c | 2 +- arch/s390/mm/Makefile | 2 +- arch/s390/mm/extmem.c | 106 ++++--------- arch/s390/mm/init.c | 163 +++++-------------- arch/s390/mm/vmem.c | 381 +++++++++++++++++++++++++++++++++++++++++++++ include/asm-s390/page.h | 22 ++- include/asm-s390/pgalloc.h | 3 + include/asm-s390/pgtable.h | 16 +- 9 files changed, 488 insertions(+), 210 deletions(-) create mode 100644 arch/s390/mm/vmem.c (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a08e9100e7e4..f12ca8fba71b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -235,6 +235,9 @@ config WARN_STACK_SIZE source "mm/Kconfig" +config HOLES_IN_ZONE + def_bool y + comment "I/O subsystem configuration" config MACHCHK_WARNING diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b928fecdc743..b8a1ce215142 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -64,7 +64,7 @@ unsigned int console_devno = -1; unsigned int console_irq = -1; unsigned long machine_flags = 0; -struct mem_chunk memory_chunk[MEMORY_CHUNKS]; +struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ unsigned long __initdata zholes_size[MAX_NR_ZONES]; static unsigned long __initdata memory_end; diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index aa9a42b6e62d..8e09db1edbb9 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,6 +2,6 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o ioremap.o extmem.o mmap.o +obj-y := init.o fault.o ioremap.o extmem.o mmap.o vmem.o obj-$(CONFIG_CMM) += cmm.o diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 9e9bc48463a5..775bf19e742b 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -237,65 +238,6 @@ query_segment_type (struct dcss_segment *seg) return rc; } -/* - * check if the given segment collides with guest storage. - * returns 1 if this is the case, 0 if no collision was found - */ -static int -segment_overlaps_storage(struct dcss_segment *seg) -{ - int i; - - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - if (memory_chunk[i].type != CHUNK_READ_WRITE) - continue; - if ((memory_chunk[i].addr >> 20) > (seg->end >> 20)) - continue; - if (((memory_chunk[i].addr + memory_chunk[i].size - 1) >> 20) - < (seg->start_addr >> 20)) - continue; - return 1; - } - return 0; -} - -/* - * check if segment collides with other segments that are currently loaded - * returns 1 if this is the case, 0 if no collision was found - */ -static int -segment_overlaps_others (struct dcss_segment *seg) -{ - struct list_head *l; - struct dcss_segment *tmp; - - BUG_ON(!mutex_is_locked(&dcss_lock)); - list_for_each(l, &dcss_list) { - tmp = list_entry(l, struct dcss_segment, list); - if ((tmp->start_addr >> 20) > (seg->end >> 20)) - continue; - if ((tmp->end >> 20) < (seg->start_addr >> 20)) - continue; - if (seg == tmp) - continue; - return 1; - } - return 0; -} - -/* - * check if segment exceeds the kernel mapping range (detected or set via mem=) - * returns 1 if this is the case, 0 if segment fits into the range - */ -static inline int -segment_exceeds_range (struct dcss_segment *seg) -{ - int seg_last_pfn = (seg->end) >> PAGE_SHIFT; - if (seg_last_pfn > max_pfn) - return 1; - return 0; -} - /* * get info about a segment * possible return values: @@ -341,24 +283,26 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long rc = query_segment_type (seg); if (rc < 0) goto out_free; - if (segment_exceeds_range(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - exceeds" - " kernel mapping range\n",name); - rc = -ERANGE; + + rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + + switch (rc) { + case 0: + break; + case -ENOSPC: + PRINT_WARN("segment_load: not loading segment %s - overlaps " + "storage/segment\n", name); goto out_free; - } - if (segment_overlaps_storage(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - overlaps" - " storage\n",name); - rc = -ENOSPC; + case -ERANGE: + PRINT_WARN("segment_load: not loading segment %s - exceeds " + "kernel mapping range\n", name); goto out_free; - } - if (segment_overlaps_others(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - overlaps" - " other segments\n",name); - rc = -EBUSY; + default: + PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n", + name, rc); goto out_free; } + if (do_nonshared) dcss_command = DCSS_LOADNSR; else @@ -372,7 +316,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long rc = dcss_diag_translate_rc (seg->end); dcss_diag(DCSS_PURGESEG, seg->dcss_name, &seg->start_addr, &seg->end); - goto out_free; + goto out_shared; } seg->do_nonshared = do_nonshared; atomic_set(&seg->ref_count, 1); @@ -391,6 +335,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long (void*)seg->start_addr, (void*)seg->end, segtype_string[seg->vm_segtype]); goto out; + out_shared: + remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); out_free: kfree(seg); out: @@ -530,12 +476,12 @@ segment_unload(char *name) "please report to linux390@de.ibm.com\n",name); goto out_unlock; } - if (atomic_dec_return(&seg->ref_count) == 0) { - list_del(&seg->list); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, - &dummy, &dummy); - kfree(seg); - } + if (atomic_dec_return(&seg->ref_count) != 0) + goto out_unlock; + remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + list_del(&seg->list); + dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); + kfree(seg); out_unlock: mutex_unlock(&dcss_lock); } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index e1881c31b1cb..5ea12a573cad 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -69,6 +69,8 @@ void show_mem(void) printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); i = max_mapnr; while (i-- > 0) { + if (!pfn_valid(i)) + continue; page = pfn_to_page(i); total++; if (PageReserved(page)) @@ -84,67 +86,53 @@ void show_mem(void) printk("%d pages swap cached\n",cached); } +static void __init setup_ro_region(void) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + pte_t new_pte; + unsigned long address, end; + + address = ((unsigned long)&__start_rodata) & PAGE_MASK; + end = PFN_ALIGN((unsigned long)&__end_rodata); + + for (; address < end; address += PAGE_SIZE) { + pgd = pgd_offset_k(address); + pmd = pmd_offset(pgd, address); + pte = pte_offset_kernel(pmd, address); + new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO)); + set_pte(pte, new_pte); + } +} + extern unsigned long __initdata zholes_size[]; +extern void vmem_map_init(void); /* * paging_init() sets up the page tables */ - -#ifndef CONFIG_64BIT void __init paging_init(void) { - pgd_t * pg_dir; - pte_t * pg_table; - pte_t pte; - int i; - unsigned long tmp; - unsigned long pfn = 0; - unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE; - static const int ssm_mask = 0x04000000L; - unsigned long ro_start_pfn, ro_end_pfn; + pgd_t *pg_dir; + int i; + unsigned long pgdir_k; + static const int ssm_mask = 0x04000000L; unsigned long zones_size[MAX_NR_ZONES]; + unsigned long dma_pfn, high_pfn; - ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata); - ro_end_pfn = PFN_UP((unsigned long)&__end_rodata); - - memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_DMA] = max_low_pfn; - free_area_init_node(0, &contig_page_data, zones_size, - __pa(PAGE_OFFSET) >> PAGE_SHIFT, - zholes_size); - - /* unmap whole virtual address space */ + pg_dir = swapper_pg_dir; - pg_dir = swapper_pg_dir; - +#ifdef CONFIG_64BIT + pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERN_REGION_TABLE; for (i = 0; i < PTRS_PER_PGD; i++) - pmd_clear((pmd_t *) pg_dir++); - - /* - * map whole physical memory to virtual memory (identity mapping) - */ - - pg_dir = swapper_pg_dir; - - while (pfn < max_low_pfn) { - /* - * pg_table is physical at this point - */ - pg_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); - - pmd_populate_kernel(&init_mm, (pmd_t *) pg_dir, pg_table); - pg_dir++; - - for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) { - if (pfn >= ro_start_pfn && pfn < ro_end_pfn) - pte = pfn_pte(pfn, __pgprot(_PAGE_RO)); - else - pte = pfn_pte(pfn, PAGE_KERNEL); - if (pfn >= max_low_pfn) - pte_val(pte) = _PAGE_TYPE_EMPTY; - set_pte(pg_table, pte); - pfn++; - } - } + pgd_clear(pg_dir + i); +#else + pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE; + for (i = 0; i < PTRS_PER_PGD; i++) + pmd_clear((pmd_t *)(pg_dir + i)); +#endif + vmem_map_init(); + setup_ro_region(); S390_lowcore.kernel_asce = pgdir_k; @@ -154,31 +142,9 @@ void __init paging_init(void) __ctl_load(pgdir_k, 13, 13); __raw_local_irq_ssm(ssm_mask); - local_flush_tlb(); -} - -#else /* CONFIG_64BIT */ - -void __init paging_init(void) -{ - pgd_t * pg_dir; - pmd_t * pm_dir; - pte_t * pt_dir; - pte_t pte; - int i,j,k; - unsigned long pfn = 0; - unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | - _KERN_REGION_TABLE; - static const int ssm_mask = 0x04000000L; - unsigned long zones_size[MAX_NR_ZONES]; - unsigned long dma_pfn, high_pfn; - unsigned long ro_start_pfn, ro_end_pfn; - memset(zones_size, 0, sizeof(zones_size)); dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; high_pfn = max_low_pfn; - ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata); - ro_end_pfn = PFN_UP((unsigned long)&__end_rodata); if (dma_pfn > high_pfn) zones_size[ZONE_DMA] = high_pfn; @@ -190,56 +156,7 @@ void __init paging_init(void) /* Initialize mem_map[]. */ free_area_init_node(0, &contig_page_data, zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); - - /* - * map whole physical memory to virtual memory (identity mapping) - */ - - pg_dir = swapper_pg_dir; - - for (i = 0 ; i < PTRS_PER_PGD ; i++,pg_dir++) { - - if (pfn >= max_low_pfn) { - pgd_clear(pg_dir); - continue; - } - - pm_dir = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE * 4); - pgd_populate(&init_mm, pg_dir, pm_dir); - - for (j = 0 ; j < PTRS_PER_PMD ; j++,pm_dir++) { - if (pfn >= max_low_pfn) { - pmd_clear(pm_dir); - continue; - } - - pt_dir = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); - - for (k = 0 ; k < PTRS_PER_PTE ; k++,pt_dir++) { - if (pfn >= ro_start_pfn && pfn < ro_end_pfn) - pte = pfn_pte(pfn, __pgprot(_PAGE_RO)); - else - pte = pfn_pte(pfn, PAGE_KERNEL); - if (pfn >= max_low_pfn) - pte_val(pte) = _PAGE_TYPE_EMPTY; - set_pte(pt_dir, pte); - pfn++; - } - } - } - - S390_lowcore.kernel_asce = pgdir_k; - - /* enable virtual mapping in kernel mode */ - __ctl_load(pgdir_k, 1, 1); - __ctl_load(pgdir_k, 7, 7); - __ctl_load(pgdir_k, 13, 13); - __raw_local_irq_ssm(ssm_mask); - - local_flush_tlb(); } -#endif /* CONFIG_64BIT */ void __init mem_init(void) { @@ -269,6 +186,8 @@ void __init mem_init(void) printk("Write protected kernel read-only data: %#lx - %#lx\n", (unsigned long)&__start_rodata, PFN_ALIGN((unsigned long)&__end_rodata) - 1); + printk("Virtual memmap size: %ldk\n", + (max_pfn * sizeof(struct page)) >> 10); } void free_initmem(void) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c new file mode 100644 index 000000000000..7f2944d3ec2a --- /dev/null +++ b/arch/s390/mm/vmem.c @@ -0,0 +1,381 @@ +/* + * arch/s390/mm/vmem.c + * + * Copyright IBM Corp. 2006 + * Author(s): Heiko Carstens + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long vmalloc_end; +EXPORT_SYMBOL(vmalloc_end); + +static struct page *vmem_map; +static DEFINE_MUTEX(vmem_mutex); + +struct memory_segment { + struct list_head list; + unsigned long start; + unsigned long size; +}; + +static LIST_HEAD(mem_segs); + +void memmap_init(unsigned long size, int nid, unsigned long zone, + unsigned long start_pfn) +{ + struct page *start, *end; + struct page *map_start, *map_end; + int i; + + start = pfn_to_page(start_pfn); + end = start + size; + + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + unsigned long cstart, cend; + + cstart = PFN_DOWN(memory_chunk[i].addr); + cend = cstart + PFN_DOWN(memory_chunk[i].size); + + map_start = mem_map + cstart; + map_end = mem_map + cend; + + if (map_start < start) + map_start = start; + if (map_end > end) + map_end = end; + + map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) + / sizeof(struct page); + map_end += ((PFN_ALIGN((unsigned long) map_end) + - (unsigned long) map_end) + / sizeof(struct page)); + + if (map_start < map_end) + memmap_init_zone((unsigned long)(map_end - map_start), + nid, zone, page_to_pfn(map_start)); + } +} + +static inline void *vmem_alloc_pages(unsigned int order) +{ + if (slab_is_available()) + return (void *)__get_free_pages(GFP_KERNEL, order); + return alloc_bootmem_pages((1 << order) * PAGE_SIZE); +} + +static inline pmd_t *vmem_pmd_alloc(void) +{ + pmd_t *pmd; + int i; + + pmd = vmem_alloc_pages(PMD_ALLOC_ORDER); + if (!pmd) + return NULL; + for (i = 0; i < PTRS_PER_PMD; i++) + pmd_clear(pmd + i); + return pmd; +} + +static inline pte_t *vmem_pte_alloc(void) +{ + pte_t *pte; + pte_t empty_pte; + int i; + + pte = vmem_alloc_pages(PTE_ALLOC_ORDER); + if (!pte) + return NULL; + pte_val(empty_pte) = _PAGE_TYPE_EMPTY; + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(pte + i, empty_pte); + return pte; +} + +/* + * Add a physical memory range to the 1:1 mapping. + */ +static int vmem_add_range(unsigned long start, unsigned long size) +{ + unsigned long address; + pgd_t *pg_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + int ret = -ENOMEM; + + for (address = start; address < start + size; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + goto out; + pgd_populate(&init_mm, pg_dir, pm_dir); + } + + pm_dir = pmd_offset(pg_dir, address); + if (pmd_none(*pm_dir)) { + pt_dir = vmem_pte_alloc(); + if (!pt_dir) + goto out; + pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + } + + pt_dir = pte_offset_kernel(pm_dir, address); + pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL); + set_pte(pt_dir, pte); + } + ret = 0; +out: + flush_tlb_kernel_range(start, start + size); + return ret; +} + +/* + * Remove a physical memory range from the 1:1 mapping. + * Currently only invalidates page table entries. + */ +static void vmem_remove_range(unsigned long start, unsigned long size) +{ + unsigned long address; + pgd_t *pg_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + + pte_val(pte) = _PAGE_TYPE_EMPTY; + for (address = start; address < start + size; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) + continue; + pm_dir = pmd_offset(pg_dir, address); + if (pmd_none(*pm_dir)) + continue; + pt_dir = pte_offset_kernel(pm_dir, address); + set_pte(pt_dir, pte); + } + flush_tlb_kernel_range(start, start + size); +} + +/* + * Add a backed mem_map array to the virtual mem_map array. + */ +static int vmem_add_mem_map(unsigned long start, unsigned long size) +{ + unsigned long address, start_addr, end_addr; + struct page *map_start, *map_end; + pgd_t *pg_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + int ret = -ENOMEM; + + map_start = vmem_map + PFN_DOWN(start); + map_end = vmem_map + PFN_DOWN(start + size); + + start_addr = (unsigned long) map_start & PAGE_MASK; + end_addr = PFN_ALIGN((unsigned long) map_end); + + for (address = start_addr; address < end_addr; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + goto out; + pgd_populate(&init_mm, pg_dir, pm_dir); + } + + pm_dir = pmd_offset(pg_dir, address); + if (pmd_none(*pm_dir)) { + pt_dir = vmem_pte_alloc(); + if (!pt_dir) + goto out; + pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + } + + pt_dir = pte_offset_kernel(pm_dir, address); + if (pte_none(*pt_dir)) { + unsigned long new_page; + + new_page =__pa(vmem_alloc_pages(0)); + if (!new_page) + goto out; + pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL); + set_pte(pt_dir, pte); + } + } + ret = 0; +out: + flush_tlb_kernel_range(start_addr, end_addr); + return ret; +} + +static int vmem_add_mem(unsigned long start, unsigned long size) +{ + int ret; + + ret = vmem_add_range(start, size); + if (ret) + return ret; + return vmem_add_mem_map(start, size); +} + +/* + * Add memory segment to the segment list if it doesn't overlap with + * an already present segment. + */ +static int insert_memory_segment(struct memory_segment *seg) +{ + struct memory_segment *tmp; + + if (PFN_DOWN(seg->start + seg->size) > max_pfn || + seg->start + seg->size < seg->start) + return -ERANGE; + + list_for_each_entry(tmp, &mem_segs, list) { + if (seg->start >= tmp->start + tmp->size) + continue; + if (seg->start + seg->size <= tmp->start) + continue; + return -ENOSPC; + } + list_add(&seg->list, &mem_segs); + return 0; +} + +/* + * Remove memory segment from the segment list. + */ +static void remove_memory_segment(struct memory_segment *seg) +{ + list_del(&seg->list); +} + +static void __remove_shared_memory(struct memory_segment *seg) +{ + remove_memory_segment(seg); + vmem_remove_range(seg->start, seg->size); +} + +int remove_shared_memory(unsigned long start, unsigned long size) +{ + struct memory_segment *seg; + int ret; + + mutex_lock(&vmem_mutex); + + ret = -ENOENT; + list_for_each_entry(seg, &mem_segs, list) { + if (seg->start == start && seg->size == size) + break; + } + + if (seg->start != start || seg->size != size) + goto out; + + ret = 0; + __remove_shared_memory(seg); + kfree(seg); +out: + mutex_unlock(&vmem_mutex); + return ret; +} + +int add_shared_memory(unsigned long start, unsigned long size) +{ + struct memory_segment *seg; + struct page *page; + unsigned long pfn, num_pfn, end_pfn; + int ret; + + mutex_lock(&vmem_mutex); + ret = -ENOMEM; + seg = kzalloc(sizeof(*seg), GFP_KERNEL); + if (!seg) + goto out; + seg->start = start; + seg->size = size; + + ret = insert_memory_segment(seg); + if (ret) + goto out_free; + + ret = vmem_add_mem(start, size); + if (ret) + goto out_remove; + + pfn = PFN_DOWN(start); + num_pfn = PFN_DOWN(size); + end_pfn = pfn + num_pfn; + + page = pfn_to_page(pfn); + memset(page, 0, num_pfn * sizeof(struct page)); + + for (; pfn < end_pfn; pfn++) { + page = pfn_to_page(pfn); + init_page_count(page); + reset_page_mapcount(page); + SetPageReserved(page); + INIT_LIST_HEAD(&page->lru); + } + goto out; + +out_remove: + __remove_shared_memory(seg); +out_free: + kfree(seg); +out: + mutex_unlock(&vmem_mutex); + return ret; +} + +/* + * map whole physical memory to virtual memory (identity mapping) + */ +void __init vmem_map_init(void) +{ + unsigned long map_size; + int i; + + map_size = ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * sizeof(struct page); + vmalloc_end = PFN_ALIGN(VMALLOC_END_INIT) - PFN_ALIGN(map_size); + vmem_map = (struct page *) vmalloc_end; + NODE_DATA(0)->node_mem_map = vmem_map; + + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) + vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size); +} + +/* + * Convert memory chunk array to a memory segment list so there is a single + * list that contains both r/w memory and shared memory segments. + */ +static int __init vmem_convert_memory_chunk(void) +{ + struct memory_segment *seg; + int i; + + mutex_lock(&vmem_mutex); + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (!memory_chunk[i].size) + continue; + seg = kzalloc(sizeof(*seg), GFP_KERNEL); + if (!seg) + panic("Out of memory...\n"); + seg->start = memory_chunk[i].addr; + seg->size = memory_chunk[i].size; + insert_memory_segment(seg); + } + mutex_unlock(&vmem_mutex); + return 0; +} + +core_initcall(vmem_convert_memory_chunk); diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h index 363ea761d5ee..05ea6f172786 100644 --- a/include/asm-s390/page.h +++ b/include/asm-s390/page.h @@ -127,6 +127,26 @@ page_get_storage_key(unsigned long addr) return skey; } +extern unsigned long max_pfn; + +static inline int pfn_valid(unsigned long pfn) +{ + unsigned long dummy; + int ccode; + + if (pfn >= max_pfn) + return 0; + + asm volatile( + " lra %0,0(%2)\n" + " ipm %1\n" + " srl %1,28\n" + : "=d" (dummy), "=d" (ccode) + : "a" (pfn << PAGE_SHIFT) + : "cc"); + return !ccode; +} + #endif /* !__ASSEMBLY__ */ /* to align the pointer to the (next) page boundary */ @@ -138,8 +158,6 @@ page_get_storage_key(unsigned long addr) #define __va(x) (void *)(unsigned long)(x) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) - -#define pfn_valid(pfn) ((pfn) < max_mapnr) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index 28619de5ecae..0707a7e2fc16 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h @@ -25,8 +25,11 @@ extern void diag10(unsigned long addr); * Page allocation orders. */ #ifndef __s390x__ +# define PTE_ALLOC_ORDER 0 +# define PMD_ALLOC_ORDER 0 # define PGD_ALLOC_ORDER 1 #else /* __s390x__ */ +# define PTE_ALLOC_ORDER 0 # define PMD_ALLOC_ORDER 2 # define PGD_ALLOC_ORDER 2 #endif /* __s390x__ */ diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 2d968a69ed1f..ae61aca5d483 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -107,23 +107,25 @@ extern char empty_zero_page[PAGE_SIZE]; * The vmalloc() routines leaves a hole of 4kB between each vmalloced * area for the same reason. ;) */ +extern unsigned long vmalloc_end; #define VMALLOC_OFFSET (8*1024*1024) #define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) \ & ~(VMALLOC_OFFSET-1)) +#define VMALLOC_END vmalloc_end /* * We need some free virtual space to be able to do vmalloc. * VMALLOC_MIN_SIZE defines the minimum size of the vmalloc * area. On a machine with 2GB memory we make sure that we * have at least 128MB free space for vmalloc. On a machine - * with 4TB we make sure we have at least 1GB. + * with 4TB we make sure we have at least 128GB. */ #ifndef __s390x__ #define VMALLOC_MIN_SIZE 0x8000000UL -#define VMALLOC_END 0x80000000UL +#define VMALLOC_END_INIT 0x80000000UL #else /* __s390x__ */ -#define VMALLOC_MIN_SIZE 0x40000000UL -#define VMALLOC_END 0x40000000000UL +#define VMALLOC_MIN_SIZE 0x2000000000UL +#define VMALLOC_END_INIT 0x40000000000UL #endif /* __s390x__ */ /* @@ -815,11 +817,17 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define kern_addr_valid(addr) (1) +extern int add_shared_memory(unsigned long start, unsigned long size); +extern int remove_shared_memory(unsigned long start, unsigned long size); + /* * No page table caches to initialise */ #define pgtable_cache_init() do { } while (0) +#define __HAVE_ARCH_MEMMAP_INIT +extern void memmap_init(unsigned long, int, unsigned long, unsigned long); + #define __HAVE_ARCH_PTEP_ESTABLISH #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -- cgit v1.2.3 From 39b742f957a287a7514a8a35c9f516cdf30b9ff5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 8 Dec 2006 15:56:10 +0100 Subject: [S390] Use add_active_range() and free_area_init_nodes(). Size zones and holes in an architecture independent manner for s390. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 3 +++ arch/s390/kernel/setup.c | 53 +++++++++++------------------------------------- arch/s390/mm/init.c | 23 ++++++--------------- 3 files changed, 21 insertions(+), 58 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f12ca8fba71b..04f5a0230298 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -233,6 +233,9 @@ config WARN_STACK_SIZE This allows you to specify the maximum frame size a function may have without the compiler complaining about it. +config ARCH_POPULATES_NODE_MAP + def_bool y + source "mm/Kconfig" config HOLES_IN_ZONE diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b8a1ce215142..49ef206ec880 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -66,7 +66,6 @@ unsigned long machine_flags = 0; struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ -unsigned long __initdata zholes_size[MAX_NR_ZONES]; static unsigned long __initdata memory_end; /* @@ -354,21 +353,6 @@ void machine_power_off(void) */ void (*pm_power_off)(void) = machine_power_off; -static void __init -add_memory_hole(unsigned long start, unsigned long end) -{ - unsigned long dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; - - if (end <= dma_pfn) - zholes_size[ZONE_DMA] += end - start + 1; - else if (start > dma_pfn) - zholes_size[ZONE_NORMAL] += end - start + 1; - else { - zholes_size[ZONE_DMA] += dma_pfn - start + 1; - zholes_size[ZONE_NORMAL] += end - dma_pfn; - } -} - static int __init early_parse_mem(char *p) { memory_end = memparse(p, &p); @@ -521,7 +505,6 @@ setup_memory(void) { unsigned long bootmap_size; unsigned long start_pfn, end_pfn, init_pfn; - unsigned long last_rw_end; int i; /* @@ -577,39 +560,27 @@ setup_memory(void) /* * Register RAM areas with the bootmem allocator. */ - last_rw_end = start_pfn; for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - unsigned long start_chunk, end_chunk; + unsigned long start_chunk, end_chunk, pfn; if (memory_chunk[i].type != CHUNK_READ_WRITE) continue; - start_chunk = (memory_chunk[i].addr + PAGE_SIZE - 1); - start_chunk >>= PAGE_SHIFT; - end_chunk = (memory_chunk[i].addr + memory_chunk[i].size); - end_chunk >>= PAGE_SHIFT; - if (start_chunk < start_pfn) - start_chunk = start_pfn; - if (end_chunk > end_pfn) - end_chunk = end_pfn; - if (start_chunk < end_chunk) { - /* Initialize storage key for RAM pages */ - for (init_pfn = start_chunk ; init_pfn < end_chunk; - init_pfn++) - page_set_storage_key(init_pfn << PAGE_SHIFT, - PAGE_DEFAULT_KEY); - free_bootmem(start_chunk << PAGE_SHIFT, - (end_chunk - start_chunk) << PAGE_SHIFT); - if (last_rw_end < start_chunk) - add_memory_hole(last_rw_end, start_chunk - 1); - last_rw_end = end_chunk; - } + start_chunk = PFN_DOWN(memory_chunk[i].addr); + end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size) - 1; + end_chunk = min(end_chunk, end_pfn); + if (start_chunk >= end_chunk) + continue; + add_active_range(0, start_chunk, end_chunk); + pfn = max(start_chunk, start_pfn); + for (; pfn <= end_chunk; pfn++) + page_set_storage_key(PFN_PHYS(pfn), PAGE_DEFAULT_KEY); } psw_set_key(PAGE_DEFAULT_KEY); - if (last_rw_end < end_pfn - 1) - add_memory_hole(last_rw_end, end_pfn - 1); + free_bootmem_with_active_regions(0, max_pfn); + reserve_bootmem(0, PFN_PHYS(start_pfn)); /* * Reserve the bootmem bitmap itself as well. We do this in two diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 5ea12a573cad..aa39591ca130 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -106,8 +106,8 @@ static void __init setup_ro_region(void) } } -extern unsigned long __initdata zholes_size[]; extern void vmem_map_init(void); + /* * paging_init() sets up the page tables */ @@ -117,8 +117,7 @@ void __init paging_init(void) int i; unsigned long pgdir_k; static const int ssm_mask = 0x04000000L; - unsigned long zones_size[MAX_NR_ZONES]; - unsigned long dma_pfn, high_pfn; + unsigned long max_zone_pfns[MAX_NR_ZONES]; pg_dir = swapper_pg_dir; @@ -142,20 +141,10 @@ void __init paging_init(void) __ctl_load(pgdir_k, 13, 13); __raw_local_irq_ssm(ssm_mask); - memset(zones_size, 0, sizeof(zones_size)); - dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; - high_pfn = max_low_pfn; - - if (dma_pfn > high_pfn) - zones_size[ZONE_DMA] = high_pfn; - else { - zones_size[ZONE_DMA] = dma_pfn; - zones_size[ZONE_NORMAL] = high_pfn - dma_pfn; - } - - /* Initialize mem_map[]. */ - free_area_init_node(0, &contig_page_data, zones_size, - __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + free_area_init_nodes(max_zone_pfns); } void __init mem_init(void) -- cgit v1.2.3 From 028d9b3cc62cb9dd31f1b5929edb3c23612cfccc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 8 Dec 2006 15:56:13 +0100 Subject: [S390] Poison init section before freeing it. The data patterns should allow us to easily tell if somebody accesses initdata/code after it was freed. Same code as on various other architectures. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index aa39591ca130..4bb21be3b007 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -187,6 +188,7 @@ void free_initmem(void) for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); free_page(addr); totalram_pages++; } -- cgit v1.2.3