diff options
author | Linus Torvalds | 2008-04-25 12:32:10 -0700 |
---|---|---|
committer | Linus Torvalds | 2008-04-25 12:32:10 -0700 |
commit | 4b7227ca321ccf447cdc04538687c895db8b77f5 (patch) | |
tree | 72712127fc56aa2579e8a1508998bcabf6bd6c60 /drivers | |
parent | 5dae61b80564a5583ff4b56e357bdbc733fddb76 (diff) | |
parent | 1775826ceec51187aa868406585799b7e76ffa7d (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-xen-next
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-xen-next: (52 commits)
xen: add balloon driver
xen: allow compilation with non-flat memory
xen: fold xen_sysexit into xen_iret
xen: allow set_pte_at on init_mm to be lockless
xen: disable preemption during tlb flush
xen pvfb: Para-virtual framebuffer, keyboard and pointer driver
xen: Add compatibility aliases for frontend drivers
xen: Module autoprobing support for frontend drivers
xen blkfront: Delay wait for block devices until after the disk is added
xen/blkfront: use bdget_disk
xen: Make xen-blkfront write its protocol ABI to xenstore
xen: import arch generic part of xencomm
xen: make grant table arch portable
xen: replace callers of alloc_vm_area()/free_vm_area() with xen_ prefixed one
xen: make include/xen/page.h portable moving those definitions under asm dir
xen: add resend_irq_on_evtchn() definition into events.c
Xen: make events.c portable for ia64/xen support
xen: move events.c to drivers/xen for IA64/Xen support
xen: move features.c from arch/x86/xen/features.c to drivers/xen
xen: add missing definitions in include/xen/interface/vcpu.h which ia64/xen needs
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/Kconfig | 2 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 23 | ||||
-rw-r--r-- | drivers/input/Kconfig | 9 | ||||
-rw-r--r-- | drivers/input/Makefile | 2 | ||||
-rw-r--r-- | drivers/input/xen-kbdfront.c | 340 | ||||
-rw-r--r-- | drivers/net/xen-netfront.c | 2 | ||||
-rw-r--r-- | drivers/video/Kconfig | 14 | ||||
-rw-r--r-- | drivers/video/Makefile | 1 | ||||
-rw-r--r-- | drivers/video/xen-fbfront.c | 550 | ||||
-rw-r--r-- | drivers/xen/Kconfig | 19 | ||||
-rw-r--r-- | drivers/xen/Makefile | 4 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 712 | ||||
-rw-r--r-- | drivers/xen/events.c | 674 | ||||
-rw-r--r-- | drivers/xen/features.c | 29 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 37 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_client.c | 6 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 32 | ||||
-rw-r--r-- | drivers/xen/xencomm.c | 232 |
18 files changed, 2646 insertions, 42 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig index 3a0e3549739f..80f0ec91e2cf 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -97,4 +97,6 @@ source "drivers/dca/Kconfig" source "drivers/auxdisplay/Kconfig" source "drivers/uio/Kconfig" + +source "drivers/xen/Kconfig" endmenu diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9c6f3f99208d..d771da816d95 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -47,6 +47,7 @@ #include <xen/interface/grant_table.h> #include <xen/interface/io/blkif.h> +#include <xen/interface/io/protocols.h> #include <asm/xen/hypervisor.h> @@ -74,7 +75,6 @@ static struct block_device_operations xlvbd_block_fops; struct blkfront_info { struct xenbus_device *xbdev; - dev_t dev; struct gendisk *gd; int vdevice; blkif_vdev_t handle; @@ -88,6 +88,7 @@ struct blkfront_info struct blk_shadow shadow[BLK_RING_SIZE]; unsigned long shadow_free; int feature_barrier; + int is_ready; /** * The number of people holding this device open. We won't allow a @@ -614,6 +615,12 @@ again: message = "writing event-channel"; goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", + XEN_IO_PROTO_ABI_NATIVE); + if (err) { + message = "writing protocol"; + goto abort_transaction; + } err = xenbus_transaction_end(xbt, 0); if (err) { @@ -833,6 +840,8 @@ static void blkfront_connect(struct blkfront_info *info) spin_unlock_irq(&blkif_io_lock); add_disk(info->gd); + + info->is_ready = 1; } /** @@ -896,7 +905,7 @@ static void backend_changed(struct xenbus_device *dev, break; case XenbusStateClosing: - bd = bdget(info->dev); + bd = bdget_disk(info->gd, 0); if (bd == NULL) xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); @@ -925,6 +934,13 @@ static int blkfront_remove(struct xenbus_device *dev) return 0; } +static int blkfront_is_ready(struct xenbus_device *dev) +{ + struct blkfront_info *info = dev->dev.driver_data; + + return info->is_ready; +} + static int blkif_open(struct inode *inode, struct file *filep) { struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; @@ -971,6 +987,7 @@ static struct xenbus_driver blkfront = { .remove = blkfront_remove, .resume = blkfront_resume, .otherend_changed = backend_changed, + .is_ready = blkfront_is_ready, }; static int __init xlblk_init(void) @@ -998,3 +1015,5 @@ module_exit(xlblk_exit); MODULE_DESCRIPTION("Xen virtual block device frontend"); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); +MODULE_ALIAS("xen:vbd"); +MODULE_ALIAS("xenblk"); diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index 9dea14db724c..5f9d860925a1 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig @@ -149,6 +149,15 @@ config INPUT_APMPOWER To compile this driver as a module, choose M here: the module will be called apm-power. +config XEN_KBDDEV_FRONTEND + tristate "Xen virtual keyboard and mouse support" + depends on XEN_FBDEV_FRONTEND + default y + help + This driver implements the front-end of the Xen virtual + keyboard and mouse device driver. It communicates with a back-end + in another domain. + comment "Input Device Drivers" source "drivers/input/keyboard/Kconfig" diff --git a/drivers/input/Makefile b/drivers/input/Makefile index 2ae87b19caa8..98c4f9a77876 100644 --- a/drivers/input/Makefile +++ b/drivers/input/Makefile @@ -23,3 +23,5 @@ obj-$(CONFIG_INPUT_TOUCHSCREEN) += touchscreen/ obj-$(CONFIG_INPUT_MISC) += misc/ obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o + +obj-$(CONFIG_XEN_KBDDEV_FRONTEND) += xen-kbdfront.o diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c new file mode 100644 index 000000000000..0f47f4697cdf --- /dev/null +++ b/drivers/input/xen-kbdfront.c @@ -0,0 +1,340 @@ +/* + * Xen para-virtual input device + * + * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2006-2008 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> + * + * Based on linux/drivers/input/mouse/sermouse.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + */ + +/* + * TODO: + * + * Switch to grant tables together with xen-fbfront.c. + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/input.h> +#include <asm/xen/hypervisor.h> +#include <xen/events.h> +#include <xen/page.h> +#include <xen/interface/io/fbif.h> +#include <xen/interface/io/kbdif.h> +#include <xen/xenbus.h> + +struct xenkbd_info { + struct input_dev *kbd; + struct input_dev *ptr; + struct xenkbd_page *page; + int irq; + struct xenbus_device *xbdev; + char phys[32]; +}; + +static int xenkbd_remove(struct xenbus_device *); +static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *); +static void xenkbd_disconnect_backend(struct xenkbd_info *); + +/* + * Note: if you need to send out events, see xenfb_do_update() for how + * to do that. + */ + +static irqreturn_t input_handler(int rq, void *dev_id) +{ + struct xenkbd_info *info = dev_id; + struct xenkbd_page *page = info->page; + __u32 cons, prod; + + prod = page->in_prod; + if (prod == page->in_cons) + return IRQ_HANDLED; + rmb(); /* ensure we see ring contents up to prod */ + for (cons = page->in_cons; cons != prod; cons++) { + union xenkbd_in_event *event; + struct input_dev *dev; + event = &XENKBD_IN_RING_REF(page, cons); + + dev = info->ptr; + switch (event->type) { + case XENKBD_TYPE_MOTION: + input_report_rel(dev, REL_X, event->motion.rel_x); + input_report_rel(dev, REL_Y, event->motion.rel_y); + break; + case XENKBD_TYPE_KEY: + dev = NULL; + if (test_bit(event->key.keycode, info->kbd->keybit)) + dev = info->kbd; + if (test_bit(event->key.keycode, info->ptr->keybit)) + dev = info->ptr; + if (dev) + input_report_key(dev, event->key.keycode, + event->key.pressed); + else + printk(KERN_WARNING + "xenkbd: unhandled keycode 0x%x\n", + event->key.keycode); + break; + case XENKBD_TYPE_POS: + input_report_abs(dev, ABS_X, event->pos.abs_x); + input_report_abs(dev, ABS_Y, event->pos.abs_y); + break; + } + if (dev) + input_sync(dev); + } + mb(); /* ensure we got ring contents */ + page->in_cons = cons; + notify_remote_via_irq(info->irq); + + return IRQ_HANDLED; +} + +static int __devinit xenkbd_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int ret, i; + struct xenkbd_info *info; + struct input_dev *kbd, *ptr; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); + return -ENOMEM; + } + dev->dev.driver_data = info; + info->xbdev = dev; + info->irq = -1; + snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename); + + info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!info->page) + goto error_nomem; + + /* keyboard */ + kbd = input_allocate_device(); + if (!kbd) + goto error_nomem; + kbd->name = "Xen Virtual Keyboard"; + kbd->phys = info->phys; + kbd->id.bustype = BUS_PCI; + kbd->id.vendor = 0x5853; + kbd->id.product = 0xffff; + kbd->evbit[0] = BIT(EV_KEY); + for (i = KEY_ESC; i < KEY_UNKNOWN; i++) + set_bit(i, kbd->keybit); + for (i = KEY_OK; i < KEY_MAX; i++) + set_bit(i, kbd->keybit); + + ret = input_register_device(kbd); + if (ret) { + input_free_device(kbd); + xenbus_dev_fatal(dev, ret, "input_register_device(kbd)"); + goto error; + } + info->kbd = kbd; + + /* pointing device */ + ptr = input_allocate_device(); + if (!ptr) + goto error_nomem; + ptr->name = "Xen Virtual Pointer"; + ptr->phys = info->phys; + ptr->id.bustype = BUS_PCI; + ptr->id.vendor = 0x5853; + ptr->id.product = 0xfffe; + ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS); + for (i = BTN_LEFT; i <= BTN_TASK; i++) + set_bit(i, ptr->keybit); + ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y); + input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0); + input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0); + + ret = input_register_device(ptr); + if (ret) { + input_free_device(ptr); + xenbus_dev_fatal(dev, ret, "input_register_device(ptr)"); + goto error; + } + info->ptr = ptr; + + ret = xenkbd_connect_backend(dev, info); + if (ret < 0) + goto error; + + return 0; + + error_nomem: + ret = -ENOMEM; + xenbus_dev_fatal(dev, ret, "allocating device memory"); + error: + xenkbd_remove(dev); + return ret; +} + +static int xenkbd_resume(struct xenbus_device *dev) +{ + struct xenkbd_info *info = dev->dev.driver_data; + + xenkbd_disconnect_backend(info); + memset(info->page, 0, PAGE_SIZE); + return xenkbd_connect_backend(dev, info); +} + +static int xenkbd_remove(struct xenbus_device *dev) +{ + struct xenkbd_info *info = dev->dev.driver_data; + + xenkbd_disconnect_backend(info); + if (info->kbd) + input_unregister_device(info->kbd); + if (info->ptr) + input_unregister_device(info->ptr); + free_page((unsigned long)info->page); + kfree(info); + return 0; +} + +static int xenkbd_connect_backend(struct xenbus_device *dev, + struct xenkbd_info *info) +{ + int ret, evtchn; + struct xenbus_transaction xbt; + + ret = xenbus_alloc_evtchn(dev, &evtchn); + if (ret) + return ret; + ret = bind_evtchn_to_irqhandler(evtchn, input_handler, + 0, dev->devicetype, info); + if (ret < 0) { + xenbus_free_evtchn(dev, evtchn); + xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler"); + return ret; + } + info->irq = ret; + + again: + ret = xenbus_transaction_start(&xbt); + if (ret) { + xenbus_dev_fatal(dev, ret, "starting transaction"); + return ret; + } + ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu", + virt_to_mfn(info->page)); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", + evtchn); + if (ret) + goto error_xenbus; + ret = xenbus_transaction_end(xbt, 0); + if (ret) { + if (ret == -EAGAIN) + goto again; + xenbus_dev_fatal(dev, ret, "completing transaction"); + return ret; + } + + xenbus_switch_state(dev, XenbusStateInitialised); + return 0; + + error_xenbus: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, ret, "writing xenstore"); + return ret; +} + +static void xenkbd_disconnect_backend(struct xenkbd_info *info) +{ + if (info->irq >= 0) + unbind_from_irqhandler(info->irq, info); + info->irq = -1; +} + +static void xenkbd_backend_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + struct xenkbd_info *info = dev->dev.driver_data; + int ret, val; + + switch (backend_state) { + case XenbusStateInitialising: + case XenbusStateInitialised: + case XenbusStateUnknown: + case XenbusStateClosed: + break; + + case XenbusStateInitWait: +InitWait: + ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-abs-pointer", "%d", &val); + if (ret < 0) + val = 0; + if (val) { + ret = xenbus_printf(XBT_NIL, info->xbdev->nodename, + "request-abs-pointer", "1"); + if (ret) + printk(KERN_WARNING + "xenkbd: can't request abs-pointer"); + } + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateConnected: + /* + * Work around xenbus race condition: If backend goes + * through InitWait to Connected fast enough, we can + * get Connected twice here. + */ + if (dev->state != XenbusStateConnected) + goto InitWait; /* no InitWait seen yet, fudge it */ + break; + + case XenbusStateClosing: + xenbus_frontend_closed(dev); + break; + } +} + +static struct xenbus_device_id xenkbd_ids[] = { + { "vkbd" }, + { "" } +}; + +static struct xenbus_driver xenkbd = { + .name = "vkbd", + .owner = THIS_MODULE, + .ids = xenkbd_ids, + .probe = xenkbd_probe, + .remove = xenkbd_remove, + .resume = xenkbd_resume, + .otherend_changed = xenkbd_backend_changed, +}; + +static int __init xenkbd_init(void) +{ + if (!is_running_on_xen()) + return -ENODEV; + + /* Nothing to do if running in dom0. */ + if (is_initial_xendomain()) + return -ENODEV; + + return xenbus_register_frontend(&xenkbd); +} + +static void __exit xenkbd_cleanup(void) +{ + xenbus_unregister_driver(&xenkbd); +} + +module_init(xenkbd_init); +module_exit(xenkbd_cleanup); + +MODULE_LICENSE("GPL"); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 7483d45bc5bc..e62018a36133 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1809,3 +1809,5 @@ module_exit(netif_exit); MODULE_DESCRIPTION("Xen virtual network device frontend"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("xen:vif"); +MODULE_ALIAS("xennet"); diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 1bd5fb30237d..e3dc8f8d0c3e 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1930,6 +1930,20 @@ config FB_VIRTUAL If unsure, say N. +config XEN_FBDEV_FRONTEND + tristate "Xen virtual frame buffer support" + depends on FB && XEN + select FB_SYS_FILLRECT + select FB_SYS_COPYAREA + select FB_SYS_IMAGEBLIT + select FB_SYS_FOPS + select FB_DEFERRED_IO + default y + help + This driver implements the front-end of the Xen virtual + frame buffer driver. It communicates with a back-end + in another domain. + source "drivers/video/omap/Kconfig" source "drivers/video/backlight/Kconfig" diff --git a/drivers/video/Makefile b/drivers/video/Makefile index 11c0e5e05f21..f172b9b73314 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -114,6 +114,7 @@ obj-$(CONFIG_FB_PS3) += ps3fb.o obj-$(CONFIG_FB_SM501) += sm501fb.o obj-$(CONFIG_FB_XILINX) += xilinxfb.o obj-$(CONFIG_FB_OMAP) += omap/ +obj-$(CONFIG_XEN_FBDEV_FRONTEND) += xen-fbfront.o # Platform or fallback drivers go here obj-$(CONFIG_FB_UVESA) += uvesafb.o diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c new file mode 100644 index 000000000000..619a6f8d65a2 --- /dev/null +++ b/drivers/video/xen-fbfront.c @@ -0,0 +1,550 @@ +/* + * Xen para-virtual frame buffer device + * + * Copyright (C) 2005-2006 Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2006-2008 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> + * + * Based on linux/drivers/video/q40fb.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + */ + +/* + * TODO: + * + * Switch to grant tables when they become capable of dealing with the + * frame buffer. + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fb.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <asm/xen/hypervisor.h> +#include <xen/events.h> +#include <xen/page.h> +#include <xen/interface/io/fbif.h> +#include <xen/interface/io/protocols.h> +#include <xen/xenbus.h> + +struct xenfb_info { + unsigned char *fb; + struct fb_info *fb_info; + int x1, y1, x2, y2; /* dirty rectangle, + protected by dirty_lock */ + spinlock_t dirty_lock; + int nr_pages; + int irq; + struct xenfb_page *page; + unsigned long *mfns; + int update_wanted; /* XENFB_TYPE_UPDATE wanted */ + + struct xenbus_device *xbdev; +}; + +static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8; + +static int xenfb_remove(struct xenbus_device *); +static void xenfb_init_shared_page(struct xenfb_info *); +static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); +static void xenfb_disconnect_backend(struct xenfb_info *); + +static void xenfb_do_update(struct xenfb_info *info, + int x, int y, int w, int h) +{ + union xenfb_out_event event; + u32 prod; + + event.type = XENFB_TYPE_UPDATE; + event.update.x = x; + event.update.y = y; + event.update.width = w; + event.update.height = h; + + prod = info->page->out_prod; + /* caller ensures !xenfb_queue_full() */ + mb(); /* ensure ring space available */ + XENFB_OUT_RING_REF(info->page, prod) = event; + wmb(); /* ensure ring contents visible */ + info->page->out_prod = prod + 1; + + notify_remote_via_irq(info->irq); +} + +static int xenfb_queue_full(struct xenfb_info *info) +{ + u32 cons, prod; + + prod = info->page->out_prod; + cons = info->page->out_cons; + return prod - cons == XENFB_OUT_RING_LEN; +} + +static void xenfb_refresh(struct xenfb_info *info, + int x1, int y1, int w, int h) +{ + unsigned long flags; + int y2 = y1 + h - 1; + int x2 = x1 + w - 1; + + if (!info->update_wanted) + return; + + spin_lock_irqsave(&info->dirty_lock, flags); + + /* Combine with dirty rectangle: */ + if (info->y1 < y1) + y1 = info->y1; + if (info->y2 > y2) + y2 = info->y2; + if (info->x1 < x1) + x1 = info->x1; + if (info->x2 > x2) + x2 = info->x2; + + if (xenfb_queue_full(info)) { + /* Can't send right now, stash it in the dirty rectangle */ + info->x1 = x1; + info->x2 = x2; + info->y1 = y1; + info->y2 = y2; + spin_unlock_irqrestore(&info->dirty_lock, flags); + return; + } + + /* Clear dirty rectangle: */ + info->x1 = info->y1 = INT_MAX; + info->x2 = info->y2 = 0; + + spin_unlock_irqrestore(&info->dirty_lock, flags); + + if (x1 <= x2 && y1 <= y2) + xenfb_do_update(info, x1, y1, x2 - x1 + 1, y2 - y1 + 1); +} + +static void xenfb_deferred_io(struct fb_info *fb_info, + struct list_head *pagelist) +{ + struct xenfb_info *info = fb_info->par; + struct page *page; + unsigned long beg, end; + int y1, y2, miny, maxy; + + miny = INT_MAX; + maxy = 0; + list_for_each_entry(page, pagelist, lru) { + beg = page->index << PAGE_SHIFT; + end = beg + PAGE_SIZE - 1; + y1 = beg / fb_info->fix.line_length; + y2 = end / fb_info->fix.line_length; + if (y2 >= fb_info->var.yres) + y2 = fb_info->var.yres - 1; + if (miny > y1) + miny = y1; + if (maxy < y2) + maxy = y2; + } + xenfb_refresh(info, 0, miny, fb_info->var.xres, maxy - miny + 1); +} + +static struct fb_deferred_io xenfb_defio = { + .delay = HZ / 20, + .deferred_io = xenfb_deferred_io, +}; + +static int xenfb_setcolreg(unsigned regno, unsigned red, unsigned green, + unsigned blue, unsigned transp, + struct fb_info *info) +{ + u32 v; + + if (regno > info->cmap.len) + return 1; + +#define CNVT_TOHW(val, width) ((((val)<<(width))+0x7FFF-(val))>>16) + red = CNVT_TOHW(red, info->var.red.length); + green = CNVT_TOHW(green, info->var.green.length); + blue = CNVT_TOHW(blue, info->var.blue.length); + transp = CNVT_TOHW(transp, info->var.transp.length); +#undef CNVT_TOHW + + v = (red << info->var.red.offset) | + (green << info->var.green.offset) | + (blue << info->var.blue.offset); + + switch (info->var.bits_per_pixel) { + case 16: + case 24: + case 32: + ((u32 *)info->pseudo_palette)[regno] = v; + break; + } + + return 0; +} + +static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect) +{ + struct xenfb_info *info = p->par; + + sys_fillrect(p, rect); + xenfb_refresh(info, rect->dx, rect->dy, rect->width, rect->height); +} + +static void xenfb_imageblit(struct fb_info *p, const struct fb_image *image) +{ + struct xenfb_info *info = p->par; + + sys_imageblit(p, image); + xenfb_refresh(info, image->dx, image->dy, image->width, image->height); +} + +static void xenfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) +{ + struct xenfb_info *info = p->par; + + sys_copyarea(p, area); + xenfb_refresh(info, area->dx, area->dy, area->width, area->height); +} + +static ssize_t xenfb_write(struct fb_info *p, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct xenfb_info *info = p->par; + ssize_t res; + + res = fb_sys_write(p, buf, count, ppos); + xenfb_refresh(info, 0, 0, info->page->width, info->page->height); + return res; +} + +static struct fb_ops xenfb_fb_ops = { + .owner = THIS_MODULE, + .fb_read = fb_sys_read, + .fb_write = xenfb_write, + .fb_setcolreg = xenfb_setcolreg, + .fb_fillrect = xenfb_fillrect, + .fb_copyarea = xenfb_copyarea, + .fb_imageblit = xenfb_imageblit, +}; + +static irqreturn_t xenfb_event_handler(int rq, void *dev_id) +{ + /* + * No in events recognized, simply ignore them all. + * If you need to recognize some, see xen-kbdfront's + * input_handler() for how to do that. + */ + struct xenfb_info *info = dev_id; + struct xenfb_page *page = info->page; + + if (page->in_cons != page->in_prod) { + info->page->in_cons = info->page->in_prod; + notify_remote_via_irq(info->irq); + } + + /* Flush dirty rectangle: */ + xenfb_refresh(info, INT_MAX, INT_MAX, -INT_MAX, -INT_MAX); + + return IRQ_HANDLED; +} + +static int __devinit xenfb_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + struct xenfb_info *info; + struct fb_info *fb_info; + int ret; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); + return -ENOMEM; + } + dev->dev.driver_data = info; + info->xbdev = dev; + info->irq = -1; + info->x1 = info->y1 = INT_MAX; + spin_lock_init(&info->dirty_lock); + + info->fb = vmalloc(xenfb_mem_len); + if (info->fb == NULL) + goto error_nomem; + memset(info->fb, 0, xenfb_mem_len); + + info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT; + + info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages); + if (!info->mfns) + goto error_nomem; + + /* set up shared page */ + info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!info->page) + goto error_nomem; + + xenfb_init_shared_page(info); + + /* abusing framebuffer_alloc() to allocate pseudo_palette */ + fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL); + if (fb_info == NULL) + goto error_nomem; + + /* complete the abuse: */ + fb_info->pseudo_palette = fb_info->par; + fb_info->par = info; + + fb_info->screen_base = info->fb; + + fb_info->fbops = &xenfb_fb_ops; + fb_info->var.xres_virtual = fb_info->var.xres = info->page->width; + fb_info->var.yres_virtual = fb_info->var.yres = info->page->height; + fb_info->var.bits_per_pixel = info->page->depth; + + fb_info->var.red = (struct fb_bitfield){16, 8, 0}; + fb_info->var.green = (struct fb_bitfield){8, 8, 0}; + fb_info->var.blue = (struct fb_bitfield){0, 8, 0}; + + fb_info->var.activate = FB_ACTIVATE_NOW; + fb_info->var.height = -1; + fb_info->var.width = -1; + fb_info->var.vmode = FB_VMODE_NONINTERLACED; + + fb_info->fix.visual = FB_VISUAL_TRUECOLOR; + fb_info->fix.line_length = info->page->line_length; + fb_info->fix.smem_start = 0; + fb_info->fix.smem_len = xenfb_mem_len; + strcpy(fb_info->fix.id, "xen"); + fb_info->fix.type = FB_TYPE_PACKED_PIXELS; + fb_info->fix.accel = FB_ACCEL_NONE; + + fb_info->flags = FBINFO_FLAG_DEFAULT; + + ret = fb_alloc_cmap(&fb_info->cmap, 256, 0); + if (ret < 0) { + framebuffer_release(fb_info); + xenbus_dev_fatal(dev, ret, "fb_alloc_cmap"); + goto error; + } + + fb_info->fbdefio = &xenfb_defio; + fb_deferred_io_init(fb_info); + + ret = register_framebuffer(fb_info); + if (ret) { + fb_deferred_io_cleanup(fb_info); + fb_dealloc_cmap(&fb_info->cmap); + framebuffer_release(fb_info); + xenbus_dev_fatal(dev, ret, "register_framebuffer"); + goto error; + } + info->fb_info = fb_info; + + ret = xenfb_connect_backend(dev, info); + if (ret < 0) + goto error; + + return 0; + + error_nomem: + ret = -ENOMEM; + xenbus_dev_fatal(dev, ret, "allocating device memory"); + error: + xenfb_remove(dev); + return ret; +} + +static int xenfb_resume(struct xenbus_device *dev) +{ + struct xenfb_info *info = dev->dev.driver_data; + + xenfb_disconnect_backend(info); + xenfb_init_shared_page(info); + return xenfb_connect_backend(dev, info); +} + +static int xenfb_remove(struct xenbus_device *dev) +{ + struct xenfb_info *info = dev->dev.driver_data; + + xenfb_disconnect_backend(info); + if (info->fb_info) { + fb_deferred_io_cleanup(info->fb_info); + unregister_framebuffer(info->fb_info); + fb_dealloc_cmap(&info->fb_info->cmap); + framebuffer_release(info->fb_info); + } + free_page((unsigned long)info->page); + vfree(info->mfns); + vfree(info->fb); + kfree(info); + + return 0; +} + +static unsigned long vmalloc_to_mfn(void *address) +{ + return pfn_to_mfn(vmalloc_to_pfn(address)); +} + +static void xenfb_init_shared_page(struct xenfb_info *info) +{ + int i; + + for (i = 0; i < info->nr_pages; i++) + info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE); + + info->page->pd[0] = vmalloc_to_mfn(info->mfns); + info->page->pd[1] = 0; + info->page->width = XENFB_WIDTH; + info->page->height = XENFB_HEIGHT; + info->page->depth = XENFB_DEPTH; + info->page->line_length = (info->page->depth / 8) * info->page->width; + info->page->mem_length = xenfb_mem_len; + info->page->in_cons = info->page->in_prod = 0; + info->page->out_cons = info->page->out_prod = 0; +} + +static int xenfb_connect_backend(struct xenbus_device *dev, + struct xenfb_info *info) +{ + int ret, evtchn; + struct xenbus_transaction xbt; + + ret = xenbus_alloc_evtchn(dev, &evtchn); + if (ret) + return ret; + ret = bind_evtchn_to_irqhandler(evtchn, xenfb_event_handler, + 0, dev->devicetype, info); + if (ret < 0) { + xenbus_free_evtchn(dev, evtchn); + xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler"); + return ret; + } + info->irq = ret; + + again: + ret = xenbus_transaction_start(&xbt); + if (ret) { + xenbus_dev_fatal(dev, ret, "starting transaction"); + return ret; + } + ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu", + virt_to_mfn(info->page)); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", + evtchn); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "protocol", "%s", + XEN_IO_PROTO_ABI_NATIVE); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1"); + if (ret) + goto error_xenbus; + ret = xenbus_transaction_end(xbt, 0); + if (ret) { + if (ret == -EAGAIN) + goto again; + xenbus_dev_fatal(dev, ret, "completing transaction"); + return ret; + } + + xenbus_switch_state(dev, XenbusStateInitialised); + return 0; + + error_xenbus: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, ret, "writing xenstore"); + return ret; +} + +static void xenfb_disconnect_backend(struct xenfb_info *info) +{ + if (info->irq >= 0) + unbind_from_irqhandler(info->irq, info); + info->irq = -1; +} + +static void xenfb_backend_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + struct xenfb_info *info = dev->dev.driver_data; + int val; + + switch (backend_state) { + case XenbusStateInitialising: + case XenbusStateInitialised: + case XenbusStateUnknown: + case XenbusStateClosed: + break; + + case XenbusStateInitWait: +InitWait: + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateConnected: + /* + * Work around xenbus race condition: If backend goes + * through InitWait to Connected fast enough, we can + * get Connected twice here. + */ + if (dev->state != XenbusStateConnected) + goto InitWait; /* no InitWait seen yet, fudge it */ + + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "request-update", "%d", &val) < 0) + val = 0; + if (val) + info->update_wanted = 1; + break; + + case XenbusStateClosing: + xenbus_frontend_closed(dev); + break; + } +} + +static struct xenbus_device_id xenfb_ids[] = { + { "vfb" }, + { "" } +}; + +static struct xenbus_driver xenfb = { + .name = "vfb", + .owner = THIS_MODULE, + .ids = xenfb_ids, + .probe = xenfb_probe, + .remove = xenfb_remove, + .resume = xenfb_resume, + .otherend_changed = xenfb_backend_changed, +}; + +static int __init xenfb_init(void) +{ + if (!is_running_on_xen()) + return -ENODEV; + + /* Nothing to do if running in dom0. */ + if (is_initial_xendomain()) + return -ENODEV; + + return xenbus_register_frontend(&xenfb); +} + +static void __exit xenfb_cleanup(void) +{ + xenbus_unregister_driver(&xenfb); +} + +module_init(xenfb_init); +module_exit(xenfb_cleanup); + +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig new file mode 100644 index 000000000000..4b75a16de009 --- /dev/null +++ b/drivers/xen/Kconfig @@ -0,0 +1,19 @@ +config XEN_BALLOON + bool "Xen memory balloon driver" + depends on XEN + default y + help + The balloon driver allows the Xen domain to request more memory from + the system to expand the domain's memory allocation, or alternatively + return unneeded memory to the system. + +config XEN_SCRUB_PAGES + bool "Scrub pages before returning them to system" + depends on XEN_BALLOON + default y + help + Scrub pages before returning them to the system for reuse by + other domains. This makes sure that any confidential data + is not accidentally visible to other domains. Is it more + secure, but slightly less efficient. + If in doubt, say yes. diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 56592f0d6cef..37af04f1ffd9 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,2 +1,4 @@ -obj-y += grant-table.o +obj-y += grant-table.o features.o events.o obj-y += xenbus/ +obj-$(CONFIG_XEN_XENCOMM) += xencomm.o +obj-$(CONFIG_XEN_BALLOON) += balloon.o diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c new file mode 100644 index 000000000000..ab25ba6cbbb9 --- /dev/null +++ b/drivers/xen/balloon.c @@ -0,0 +1,712 @@ +/****************************************************************************** + * balloon.c + * + * Xen balloon driver - enables returning/claiming memory to/from Xen. + * + * Copyright (c) 2003, B Dragovic + * Copyright (c) 2003-2004, M Williamson, K Fraser + * Copyright (c) 2005 Dan M. Smith, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/bootmem.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <linux/mutex.h> +#include <linux/highmem.h> +#include <linux/list.h> +#include <linux/sysdev.h> + +#include <asm/xen/hypervisor.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/tlb.h> + +#include <xen/interface/memory.h> +#include <xen/balloon.h> +#include <xen/xenbus.h> +#include <xen/features.h> +#include <xen/page.h> + +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) + +#define BALLOON_CLASS_NAME "memory" + +struct balloon_stats { + /* We aim for 'current allocation' == 'target allocation'. */ + unsigned long current_pages; + unsigned long target_pages; + /* We may hit the hard limit in Xen. If we do then we remember it. */ + unsigned long hard_limit; + /* + * Drivers may alter the memory reservation independently, but they + * must inform the balloon driver so we avoid hitting the hard limit. + */ + unsigned long driver_pages; + /* Number of pages in high- and low-memory balloons. */ + unsigned long balloon_low; + unsigned long balloon_high; +}; + +static DEFINE_MUTEX(balloon_mutex); + +static struct sys_device balloon_sysdev; + +static int register_balloon(struct sys_device *sysdev); + +/* + * Protects atomic reservation decrease/increase against concurrent increases. + * Also protects non-atomic updates of current_pages and driver_pages, and + * balloon lists. + */ +static DEFINE_SPINLOCK(balloon_lock); + +static struct balloon_stats balloon_stats; + +/* We increase/decrease in batches which fit in a page */ +static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; + +/* VM /proc information for memory */ +extern unsigned long totalram_pages; + +#ifdef CONFIG_HIGHMEM +extern unsigned long totalhigh_pages; +#define inc_totalhigh_pages() (totalhigh_pages++) +#define dec_totalhigh_pages() (totalhigh_pages--) +#else +#define inc_totalhigh_pages() do {} while(0) +#define dec_totalhigh_pages() do {} while(0) +#endif + +/* List of ballooned pages, threaded through the mem_map array. */ +static LIST_HEAD(ballooned_pages); + +/* Main work function, always executed in process context. */ +static void balloon_process(struct work_struct *work); +static DECLARE_WORK(balloon_worker, balloon_process); +static struct timer_list balloon_timer; + +/* When ballooning out (allocating memory to return to Xen) we don't really + want the kernel to try too hard since that can trigger the oom killer. */ +#define GFP_BALLOON \ + (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) + +static void scrub_page(struct page *page) +{ +#ifdef CONFIG_XEN_SCRUB_PAGES + if (PageHighMem(page)) { + void *v = kmap(page); + clear_page(v); + kunmap(v); + } else { + void *v = page_address(page); + clear_page(v); + } +#endif +} + +/* balloon_append: add the given page to the balloon. */ +static void balloon_append(struct page *page) +{ + /* Lowmem is re-populated first, so highmem pages go at list tail. */ + if (PageHighMem(page)) { + list_add_tail(&page->lru, &ballooned_pages); + balloon_stats.balloon_high++; + dec_totalhigh_pages(); + } else { + list_add(&page->lru, &ballooned_pages); + balloon_stats.balloon_low++; + } +} + +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ +static struct page *balloon_retrieve(void) +{ + struct page *page; + + if (list_empty(&ballooned_pages)) + return NULL; + + page = list_entry(ballooned_pages.next, struct page, lru); + list_del(&page->lru); + + if (PageHighMem(page)) { + balloon_stats.balloon_high--; + inc_totalhigh_pages(); + } + else + balloon_stats.balloon_low--; + + return page; +} + +static struct page *balloon_first_page(void) +{ + if (list_empty(&ballooned_pages)) + return NULL; + return list_entry(ballooned_pages.next, struct page, lru); +} + +static struct page *balloon_next_page(struct page *page) +{ + struct list_head *next = page->lru.next; + if (next == &ballooned_pages) + return NULL; + return list_entry(next, struct page, lru); +} + +static void balloon_alarm(unsigned long unused) +{ + schedule_work(&balloon_worker); +} + +static unsigned long current_target(void) +{ + unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit); + + target = min(target, + balloon_stats.current_pages + + balloon_stats.balloon_low + + balloon_stats.balloon_high); + + return target; +} + +static int increase_reservation(unsigned long nr_pages) +{ + unsigned long pfn, i, flags; + struct page *page; + long rc; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + if (nr_pages > ARRAY_SIZE(frame_list)) + nr_pages = ARRAY_SIZE(frame_list); + + spin_lock_irqsave(&balloon_lock, flags); + + page = balloon_first_page(); + for (i = 0; i < nr_pages; i++) { + BUG_ON(page == NULL); + frame_list[i] = page_to_pfn(page);; + page = balloon_next_page(page); + } + + reservation.extent_start = (unsigned long)frame_list; + reservation.nr_extents = nr_pages; + rc = HYPERVISOR_memory_op( + XENMEM_populate_physmap, &reservation); + if (rc < nr_pages) { + if (rc > 0) { + int ret; + + /* We hit the Xen hard limit: reprobe. */ + reservation.nr_extents = rc; + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + BUG_ON(ret != rc); + } + if (rc >= 0) + balloon_stats.hard_limit = (balloon_stats.current_pages + rc - + balloon_stats.driver_pages); + goto out; + } + + for (i = 0; i < nr_pages; i++) { + page = balloon_retrieve(); + BUG_ON(page == NULL); + + pfn = page_to_pfn(page); + BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && + phys_to_machine_mapping_valid(pfn)); + + set_phys_to_machine(pfn, frame_list[i]); + + /* Link back into the page tables if not highmem. */ + if (pfn < max_low_pfn) { + int ret; + ret = HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + mfn_pte(frame_list[i], PAGE_KERNEL), + 0); + BUG_ON(ret); + } + + /* Relinquish the page back to the allocator. */ + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + } + + balloon_stats.current_pages += nr_pages; + totalram_pages = balloon_stats.current_pages; + + out: + spin_unlock_irqrestore(&balloon_lock, flags); + + return 0; +} + +static int decrease_reservation(unsigned long nr_pages) +{ + unsigned long pfn, i, flags; + struct page *page; + int need_sleep = 0; + int ret; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + if (nr_pages > ARRAY_SIZE(frame_list)) + nr_pages = ARRAY_SIZE(frame_list); + + for (i = 0; i < nr_pages; i++) { + if ((page = alloc_page(GFP_BALLOON)) == NULL) { + nr_pages = i; + need_sleep = 1; + break; + } + + pfn = page_to_pfn(page); + frame_list[i] = pfn_to_mfn(pfn); + + scrub_page(page); + } + + /* Ensure that ballooned highmem pages don't have kmaps. */ + kmap_flush_unused(); + flush_tlb_all(); + + spin_lock_irqsave(&balloon_lock, flags); + + /* No more mappings: invalidate P2M and add to balloon. */ + for (i = 0; i < nr_pages; i++) { + pfn = mfn_to_pfn(frame_list[i]); + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + balloon_append(pfn_to_page(pfn)); + } + + reservation.extent_start = (unsigned long)frame_list; + reservation.nr_extents = nr_pages; + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(ret != nr_pages); + + balloon_stats.current_pages -= nr_pages; + totalram_pages = balloon_stats.current_pages; + + spin_unlock_irqrestore(&balloon_lock, flags); + + return need_sleep; +} + +/* + * We avoid multiple worker processes conflicting via the balloon mutex. + * We may of course race updates of the target counts (which are protected + * by the balloon lock), or with changes to the Xen hard limit, but we will + * recover from these in time. + */ +static void balloon_process(struct work_struct *work) +{ + int need_sleep = 0; + long credit; + + mutex_lock(&balloon_mutex); + + do { + credit = current_target() - balloon_stats.current_pages; + if (credit > 0) + need_sleep = (increase_reservation(credit) != 0); + if (credit < 0) + need_sleep = (decrease_reservation(-credit) != 0); + +#ifndef CONFIG_PREEMPT + if (need_resched()) + schedule(); +#endif + } while ((credit != 0) && !need_sleep); + + /* Schedule more work if there is some still to be done. */ + if (current_target() != balloon_stats.current_pages) + mod_timer(&balloon_timer, jiffies + HZ); + + mutex_unlock(&balloon_mutex); +} + +/* Resets the Xen limit, sets new target, and kicks off processing. */ +void balloon_set_new_target(unsigned long target) +{ + /* No need for lock. Not read-modify-write updates. */ + balloon_stats.hard_limit = ~0UL; + balloon_stats.target_pages = target; + schedule_work(&balloon_worker); +} + +static struct xenbus_watch target_watch = +{ + .node = "memory/target" +}; + +/* React to a change in the target key */ +static void watch_target(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + unsigned long long new_target; + int err; + + err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); + if (err != 1) { + /* This is ok (for domain0 at least) - so just return */ + return; + } + + /* The given memory/target value is in KiB, so it needs converting to + * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. + */ + balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); +} + +static int balloon_init_watcher(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + int err; + + err = register_xenbus_watch(&target_watch); + if (err) + printk(KERN_ERR "Failed to set balloon watcher\n"); + + return NOTIFY_DONE; +} + +static struct notifier_block xenstore_notifier; + +static int __init balloon_init(void) +{ + unsigned long pfn; + struct page *page; + + if (!is_running_on_xen()) + return -ENODEV; + + pr_info("xen_balloon: Initialising balloon driver.\n"); + + balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); + totalram_pages = balloon_stats.current_pages; + balloon_stats.target_pages = balloon_stats.current_pages; + balloon_stats.balloon_low = 0; + balloon_stats.balloon_high = 0; + balloon_stats.driver_pages = 0UL; + balloon_stats.hard_limit = ~0UL; + + init_timer(&balloon_timer); + balloon_timer.data = 0; + balloon_timer.function = balloon_alarm; + + register_balloon(&balloon_sysdev); + + /* Initialise the balloon with excess memory space. */ + for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { + page = pfn_to_page(pfn); + if (!PageReserved(page)) + balloon_append(page); + } + + target_watch.callback = watch_target; + xenstore_notifier.notifier_call = balloon_init_watcher; + + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} + +subsys_initcall(balloon_init); + +static void balloon_exit(void) +{ + /* XXX - release balloon here */ + return; +} + +module_exit(balloon_exit); + +static void balloon_update_driver_allowance(long delta) +{ + unsigned long flags; + + spin_lock_irqsave(&balloon_lock, flags); + balloon_stats.driver_pages += delta; + spin_unlock_irqrestore(&balloon_lock, flags); +} + +static int dealloc_pte_fn( + pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) +{ + unsigned long mfn = pte_mfn(*pte); + int ret; + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .extent_order = 0, + .domid = DOMID_SELF + }; + reservation.extent_start = (unsigned long)&mfn; + set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); + set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(ret != 1); + return 0; +} + +static struct page **alloc_empty_pages_and_pagevec(int nr_pages) +{ + unsigned long vaddr, flags; + struct page *page, **pagevec; + int i, ret; + + pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); + if (pagevec == NULL) + return NULL; + + for (i = 0; i < nr_pages; i++) { + page = pagevec[i] = alloc_page(GFP_KERNEL); + if (page == NULL) + goto err; + + vaddr = (unsigned long)page_address(page); + + scrub_page(page); + + spin_lock_irqsave(&balloon_lock, flags); + + if (xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long gmfn = page_to_pfn(page); + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .extent_order = 0, + .domid = DOMID_SELF + }; + reservation.extent_start = (unsigned long)&gmfn; + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + if (ret == 1) + ret = 0; /* success */ + } else { + ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE, + dealloc_pte_fn, NULL); + } + + if (ret != 0) { + spin_unlock_irqrestore(&balloon_lock, flags); + __free_page(page); + goto err; + } + + totalram_pages = --balloon_stats.current_pages; + + spin_unlock_irqrestore(&balloon_lock, flags); + } + + out: + schedule_work(&balloon_worker); + flush_tlb_all(); + return pagevec; + + err: + spin_lock_irqsave(&balloon_lock, flags); + while (--i >= 0) + balloon_append(pagevec[i]); + spin_unlock_irqrestore(&balloon_lock, flags); + kfree(pagevec); + pagevec = NULL; + goto out; +} + +static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) +{ + unsigned long flags; + int i; + + if (pagevec == NULL) + return; + + spin_lock_irqsave(&balloon_lock, flags); + for (i = 0; i < nr_pages; i++) { + BUG_ON(page_count(pagevec[i]) != 1); + balloon_append(pagevec[i]); + } + spin_unlock_irqrestore(&balloon_lock, flags); + + kfree(pagevec); + + schedule_work(&balloon_worker); +} + +static void balloon_release_driver_page(struct page *page) +{ + unsigned long flags; + + spin_lock_irqsave(&balloon_lock, flags); + balloon_append(page); + balloon_stats.driver_pages--; + spin_unlock_irqrestore(&balloon_lock, flags); + + schedule_work(&balloon_worker); +} + + +#define BALLOON_SHOW(name, format, args...) \ + static ssize_t show_##name(struct sys_device *dev, \ + char *buf) \ + { \ + return sprintf(buf, format, ##args); \ + } \ + static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) + +BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); +BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); +BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); +BALLOON_SHOW(hard_limit_kb, + (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n", + (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); +BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); + +static ssize_t show_target_kb(struct sys_device *dev, char *buf) +{ + return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); +} + +static ssize_t store_target_kb(struct sys_device *dev, + const char *buf, + size_t count) +{ + char memstring[64], *endchar; + unsigned long long target_bytes; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (count <= 1) + return -EBADMSG; /* runt */ + if (count > sizeof(memstring)) + return -EFBIG; /* too long */ + strcpy(memstring, buf); + + target_bytes = memparse(memstring, &endchar); + balloon_set_new_target(target_bytes >> PAGE_SHIFT); + + return count; +} + +static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, + show_target_kb, store_target_kb); + +static struct sysdev_attribute *balloon_attrs[] = { + &attr_target_kb, +}; + +static struct attribute *balloon_info_attrs[] = { + &attr_current_kb.attr, + &attr_low_kb.attr, + &attr_high_kb.attr, + &attr_hard_limit_kb.attr, + &attr_driver_kb.attr, + NULL +}; + +static struct attribute_group balloon_info_group = { + .name = "info", + .attrs = balloon_info_attrs, +}; + +static struct sysdev_class balloon_sysdev_class = { + .name = BALLOON_CLASS_NAME, +}; + +static int register_balloon(struct sys_device *sysdev) +{ + int i, error; + + error = sysdev_class_register(&balloon_sysdev_class); + if (error) + return error; + + sysdev->id = 0; + sysdev->cls = &balloon_sysdev_class; + + error = sysdev_register(sysdev); + if (error) { + sysdev_class_unregister(&balloon_sysdev_class); + return error; + } + + for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { + error = sysdev_create_file(sysdev, balloon_attrs[i]); + if (error) + goto fail; + } + + error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); + if (error) + goto fail; + + return 0; + + fail: + while (--i >= 0) + sysdev_remove_file(sysdev, balloon_attrs[i]); + sysdev_unregister(sysdev); + sysdev_class_unregister(&balloon_sysdev_class); + return error; +} + +static void unregister_balloon(struct sys_device *sysdev) +{ + int i; + + sysfs_remove_group(&sysdev->kobj, &balloon_info_group); + for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) + sysdev_remove_file(sysdev, balloon_attrs[i]); + sysdev_unregister(sysdev); + sysdev_class_unregister(&balloon_sysdev_class); +} + +static void balloon_sysfs_exit(void) +{ + unregister_balloon(&balloon_sysdev); +} + +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/events.c b/drivers/xen/events.c new file mode 100644 index 000000000000..4f0f22b020ea --- /dev/null +++ b/drivers/xen/events.c @@ -0,0 +1,674 @@ +/* + * Xen event channels + * + * Xen models interrupts with abstract event channels. Because each + * domain gets 1024 event channels, but NR_IRQ is not that large, we + * must dynamically map irqs<->event channels. The event channels + * interface with the rest of the kernel by defining a xen interrupt + * chip. When an event is recieved, it is mapped to an irq and sent + * through the normal interrupt processing path. + * + * There are four kinds of events which can be mapped to an event + * channel: + * + * 1. Inter-domain notifications. This includes all the virtual + * device events, since they're driven by front-ends in another domain + * (typically dom0). + * 2. VIRQs, typically used for timers. These are per-cpu events. + * 3. IPIs. + * 4. Hardware interrupts. Not supported at present. + * + * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 + */ + +#include <linux/linkage.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/string.h> + +#include <asm/ptrace.h> +#include <asm/irq.h> +#include <asm/sync_bitops.h> +#include <asm/xen/hypercall.h> +#include <asm/xen/hypervisor.h> + +#include <xen/xen-ops.h> +#include <xen/events.h> +#include <xen/interface/xen.h> +#include <xen/interface/event_channel.h> + +/* + * This lock protects updates to the following mapping and reference-count + * arrays. The lock does not need to be acquired to read the mapping tables. + */ +static DEFINE_SPINLOCK(irq_mapping_update_lock); + +/* IRQ <-> VIRQ mapping. */ +static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; + +/* IRQ <-> IPI mapping */ +static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1}; + +/* Packed IRQ information: binding type, sub-type index, and event channel. */ +struct packed_irq +{ + unsigned short evtchn; + unsigned char index; + unsigned char type; +}; + +static struct packed_irq irq_info[NR_IRQS]; + +/* Binding types. */ +enum { + IRQT_UNBOUND, + IRQT_PIRQ, + IRQT_VIRQ, + IRQT_IPI, + IRQT_EVTCHN +}; + +/* Convenient shorthand for packed representation of an unbound IRQ. */ +#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) + +static int evtchn_to_irq[NR_EVENT_CHANNELS] = { + [0 ... NR_EVENT_CHANNELS-1] = -1 +}; +static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; +static u8 cpu_evtchn[NR_EVENT_CHANNELS]; + +/* Reference counts for bindings to IRQs. */ +static int irq_bindcount[NR_IRQS]; + +/* Xen will never allocate port zero for any purpose. */ +#define VALID_EVTCHN(chn) ((chn) != 0) + +/* + * Force a proper event-channel callback from Xen after clearing the + * callback mask. We do this in a very simple manner, by making a call + * down into Xen. The pending flag will be checked by Xen on return. + */ +void force_evtchn_callback(void) +{ + (void)HYPERVISOR_xen_version(0, NULL); +} +EXPORT_SYMBOL_GPL(force_evtchn_callback); + +static struct irq_chip xen_dynamic_chip; + +/* Constructor for packed IRQ information. */ +static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn) +{ + return (struct packed_irq) { evtchn, index, type }; +} + +/* + * Accessors for packed IRQ information. + */ +static inline unsigned int evtchn_from_irq(int irq) +{ + return irq_info[irq].evtchn; +} + +static inline unsigned int index_from_irq(int irq) +{ + return irq_info[irq].index; +} + +static inline unsigned int type_from_irq(int irq) +{ + return irq_info[irq].type; +} + +static inline unsigned long active_evtchns(unsigned int cpu, + struct shared_info *sh, + unsigned int idx) +{ + return (sh->evtchn_pending[idx] & + cpu_evtchn_mask[cpu][idx] & + ~sh->evtchn_mask[idx]); +} + +static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) +{ + int irq = evtchn_to_irq[chn]; + + BUG_ON(irq == -1); +#ifdef CONFIG_SMP + irq_desc[irq].affinity = cpumask_of_cpu(cpu); +#endif + + __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); + __set_bit(chn, cpu_evtchn_mask[cpu]); + + cpu_evtchn[chn] = cpu; +} + +static void init_evtchn_cpu_bindings(void) +{ +#ifdef CONFIG_SMP + int i; + /* By default all event channels notify CPU#0. */ + for (i = 0; i < NR_IRQS; i++) + irq_desc[i].affinity = cpumask_of_cpu(0); +#endif + + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); + memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); +} + +static inline unsigned int cpu_from_evtchn(unsigned int evtchn) +{ + return cpu_evtchn[evtchn]; +} + +static inline void clear_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + sync_clear_bit(port, &s->evtchn_pending[0]); +} + +static inline void set_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + sync_set_bit(port, &s->evtchn_pending[0]); +} + + +/** + * notify_remote_via_irq - send event to remote end of event channel via irq + * @irq: irq of event channel to send event to + * + * Unlike notify_remote_via_evtchn(), this is safe to use across + * save/restore. Notifications on a broken connection are silently + * dropped. + */ +void notify_remote_via_irq(int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + notify_remote_via_evtchn(evtchn); +} +EXPORT_SYMBOL_GPL(notify_remote_via_irq); + +static void mask_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + sync_set_bit(port, &s->evtchn_mask[0]); +} + +static void unmask_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + unsigned int cpu = get_cpu(); + + BUG_ON(!irqs_disabled()); + + /* Slow path (hypercall) if this is a non-local port. */ + if (unlikely(cpu != cpu_from_evtchn(port))) { + struct evtchn_unmask unmask = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); + } else { + struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); + + sync_clear_bit(port, &s->evtchn_mask[0]); + + /* + * The following is basically the equivalent of + * 'hw_resend_irq'. Just like a real IO-APIC we 'lose + * the interrupt edge' if the channel is masked. + */ + if (sync_test_bit(port, &s->evtchn_pending[0]) && + !sync_test_and_set_bit(port / BITS_PER_LONG, + &vcpu_info->evtchn_pending_sel)) + vcpu_info->evtchn_upcall_pending = 1; + } + + put_cpu(); +} + +static int find_unbound_irq(void) +{ + int irq; + + /* Only allocate from dynirq range */ + for (irq = 0; irq < NR_IRQS; irq++) + if (irq_bindcount[irq] == 0) + break; + + if (irq == NR_IRQS) + panic("No available IRQ to bind to: increase NR_IRQS!\n"); + + return irq; +} + +int bind_evtchn_to_irq(unsigned int evtchn) +{ + int irq; + + spin_lock(&irq_mapping_update_lock); + + irq = evtchn_to_irq[evtchn]; + + if (irq == -1) { + irq = find_unbound_irq(); + + dynamic_irq_init(irq); + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_level_irq, "event"); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + } + + irq_bindcount[irq]++; + + spin_unlock(&irq_mapping_update_lock); + + return irq; +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); + +static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) +{ + struct evtchn_bind_ipi bind_ipi; + int evtchn, irq; + + spin_lock(&irq_mapping_update_lock); + + irq = per_cpu(ipi_to_irq, cpu)[ipi]; + if (irq == -1) { + irq = find_unbound_irq(); + if (irq < 0) + goto out; + + dynamic_irq_init(irq); + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_level_irq, "ipi"); + + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi) != 0) + BUG(); + evtchn = bind_ipi.port; + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + + per_cpu(ipi_to_irq, cpu)[ipi] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_bindcount[irq]++; + + out: + spin_unlock(&irq_mapping_update_lock); + return irq; +} + + +static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int evtchn, irq; + + spin_lock(&irq_mapping_update_lock); + + irq = per_cpu(virq_to_irq, cpu)[virq]; + + if (irq == -1) { + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + + irq = find_unbound_irq(); + + dynamic_irq_init(irq); + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_level_irq, "virq"); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + + per_cpu(virq_to_irq, cpu)[virq] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_bindcount[irq]++; + + spin_unlock(&irq_mapping_update_lock); + + return irq; +} + +static void unbind_from_irq(unsigned int irq) +{ + struct evtchn_close close; + int evtchn = evtchn_from_irq(irq); + + spin_lock(&irq_mapping_update_lock); + + if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) { + close.port = evtchn; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) + BUG(); + + switch (type_from_irq(irq)) { + case IRQT_VIRQ: + per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) + [index_from_irq(irq)] = -1; + break; + default: + break; + } + + /* Closed ports are implicitly re-bound to VCPU0. */ + bind_evtchn_to_cpu(evtchn, 0); + + evtchn_to_irq[evtchn] = -1; + irq_info[irq] = IRQ_UNBOUND; + + dynamic_irq_init(irq); + } + + spin_unlock(&irq_mapping_update_lock); +} + +int bind_evtchn_to_irqhandler(unsigned int evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) +{ + unsigned int irq; + int retval; + + irq = bind_evtchn_to_irq(evtchn); + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); + +int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, + irq_handler_t handler, + unsigned long irqflags, const char *devname, void *dev_id) +{ + unsigned int irq; + int retval; + + irq = bind_virq_to_irq(virq, cpu); + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); + +int bind_ipi_to_irqhandler(enum ipi_vector ipi, + unsigned int cpu, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + int irq, retval; + + irq = bind_ipi_to_irq(ipi, cpu); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} + +void unbind_from_irqhandler(unsigned int irq, void *dev_id) +{ + free_irq(irq, dev_id); + unbind_from_irq(irq); +} +EXPORT_SYMBOL_GPL(unbind_from_irqhandler); + +void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) +{ + int irq = per_cpu(ipi_to_irq, cpu)[vector]; + BUG_ON(irq < 0); + notify_remote_via_irq(irq); +} + +irqreturn_t xen_debug_interrupt(int irq, void *dev_id) +{ + struct shared_info *sh = HYPERVISOR_shared_info; + int cpu = smp_processor_id(); + int i; + unsigned long flags; + static DEFINE_SPINLOCK(debug_lock); + + spin_lock_irqsave(&debug_lock, flags); + + printk("vcpu %d\n ", cpu); + + for_each_online_cpu(i) { + struct vcpu_info *v = per_cpu(xen_vcpu, i); + printk("%d: masked=%d pending=%d event_sel %08lx\n ", i, + (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask, + v->evtchn_upcall_pending, + v->evtchn_pending_sel); + } + printk("pending:\n "); + for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--) + printk("%08lx%s", sh->evtchn_pending[i], + i % 8 == 0 ? "\n " : " "); + printk("\nmasks:\n "); + for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) + printk("%08lx%s", sh->evtchn_mask[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nunmasked:\n "); + for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) + printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i], + i % 8 == 0 ? "\n " : " "); + + printk("\npending list:\n"); + for(i = 0; i < NR_EVENT_CHANNELS; i++) { + if (sync_test_bit(i, sh->evtchn_pending)) { + printk(" %d: event %d -> irq %d\n", + cpu_evtchn[i], i, + evtchn_to_irq[i]); + } + } + + spin_unlock_irqrestore(&debug_lock, flags); + + return IRQ_HANDLED; +} + + +/* + * Search the CPUs pending events bitmasks. For each one found, map + * the event number to an irq, and feed it into do_IRQ() for + * handling. + * + * Xen uses a two-level bitmap to speed searching. The first level is + * a bitset of words which contain pending event bits. The second + * level is a bitset of pending events themselves. + */ +void xen_evtchn_do_upcall(struct pt_regs *regs) +{ + int cpu = get_cpu(); + struct shared_info *s = HYPERVISOR_shared_info; + struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); + static DEFINE_PER_CPU(unsigned, nesting_count); + unsigned count; + + do { + unsigned long pending_words; + + vcpu_info->evtchn_upcall_pending = 0; + + if (__get_cpu_var(nesting_count)++) + goto out; + +#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ + /* Clear master flag /before/ clearing selector flag. */ + rmb(); +#endif + pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); + while (pending_words != 0) { + unsigned long pending_bits; + int word_idx = __ffs(pending_words); + pending_words &= ~(1UL << word_idx); + + while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { + int bit_idx = __ffs(pending_bits); + int port = (word_idx * BITS_PER_LONG) + bit_idx; + int irq = evtchn_to_irq[port]; + + if (irq != -1) + xen_do_IRQ(irq, regs); + } + } + + BUG_ON(!irqs_disabled()); + + count = __get_cpu_var(nesting_count); + __get_cpu_var(nesting_count) = 0; + } while(count != 1); + +out: + put_cpu(); +} + +/* Rebind an evtchn so that it gets delivered to a specific cpu */ +static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) +{ + struct evtchn_bind_vcpu bind_vcpu; + int evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + return; + + /* Send future instances of this interrupt to other vcpu. */ + bind_vcpu.port = evtchn; + bind_vcpu.vcpu = tcpu; + + /* + * If this fails, it usually just indicates that we're dealing with a + * virq or IPI channel, which don't actually need to be rebound. Ignore + * it, but don't do the xenlinux-level rebind in that case. + */ + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) + bind_evtchn_to_cpu(evtchn, tcpu); +} + + +static void set_affinity_irq(unsigned irq, cpumask_t dest) +{ + unsigned tcpu = first_cpu(dest); + rebind_irq_to_cpu(irq, tcpu); +} + +int resend_irq_on_evtchn(unsigned int irq) +{ + int masked, evtchn = evtchn_from_irq(irq); + struct shared_info *s = HYPERVISOR_shared_info; + + if (!VALID_EVTCHN(evtchn)) + return 1; + + masked = sync_test_and_set_bit(evtchn, s->evtchn_mask); + sync_set_bit(evtchn, s->evtchn_pending); + if (!masked) + unmask_evtchn(evtchn); + + return 1; +} + +static void enable_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + unmask_evtchn(evtchn); +} + +static void disable_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + mask_evtchn(evtchn); +} + +static void ack_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + move_native_irq(irq); + + if (VALID_EVTCHN(evtchn)) + clear_evtchn(evtchn); +} + +static int retrigger_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + struct shared_info *sh = HYPERVISOR_shared_info; + int ret = 0; + + if (VALID_EVTCHN(evtchn)) { + int masked; + + masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask); + sync_set_bit(evtchn, sh->evtchn_pending); + if (!masked) + unmask_evtchn(evtchn); + ret = 1; + } + + return ret; +} + +static struct irq_chip xen_dynamic_chip __read_mostly = { + .name = "xen-dyn", + .mask = disable_dynirq, + .unmask = enable_dynirq, + .ack = ack_dynirq, + .set_affinity = set_affinity_irq, + .retrigger = retrigger_dynirq, +}; + +void __init xen_init_IRQ(void) +{ + int i; + + init_evtchn_cpu_bindings(); + + /* No event channels are 'live' right now. */ + for (i = 0; i < NR_EVENT_CHANNELS; i++) + mask_evtchn(i); + + /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ + for (i = 0; i < NR_IRQS; i++) + irq_bindcount[i] = 0; + + irq_ctx_init(smp_processor_id()); +} diff --git a/drivers/xen/features.c b/drivers/xen/features.c new file mode 100644 index 000000000000..0707714e40d6 --- /dev/null +++ b/drivers/xen/features.c @@ -0,0 +1,29 @@ +/****************************************************************************** + * features.c + * + * Xen feature flags. + * + * Copyright (c) 2006, Ian Campbell, XenSource Inc. + */ +#include <linux/types.h> +#include <linux/cache.h> +#include <linux/module.h> +#include <asm/xen/hypervisor.h> +#include <xen/features.h> + +u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; +EXPORT_SYMBOL_GPL(xen_features); + +void xen_setup_features(void) +{ + struct xen_feature_info fi; + int i, j; + + for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { + fi.submap_idx = i; + if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) + break; + for (j = 0; j < 32; j++) + xen_features[i * 32 + j] = !!(fi.submap & 1<<j); + } +} diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index d85dc6d41c2a..52b6b41b909d 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -439,24 +439,6 @@ static inline unsigned int max_nr_grant_frames(void) return xen_max; } -static int map_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) -{ - unsigned long **frames = (unsigned long **)data; - - set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); - (*frames)++; - return 0; -} - -static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) -{ - - set_pte_at(&init_mm, addr, pte, __pte(0)); - return 0; -} - static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct gnttab_setup_table setup; @@ -470,7 +452,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) setup.dom = DOMID_SELF; setup.nr_frames = nr_gframes; - setup.frame_list = frames; + set_xen_guest_handle(setup.frame_list, frames); rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); if (rc == -ENOSYS) { @@ -480,17 +462,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) BUG_ON(rc || setup.status); - if (shared == NULL) { - struct vm_struct *area; - area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames()); - BUG_ON(area == NULL); - shared = area->addr; - } - rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, - map_pte_fn, &frames); + rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(), + &shared); BUG_ON(rc); - frames -= nr_gframes; /* adjust after map_pte_fn() */ kfree(frames); @@ -506,10 +480,7 @@ static int gnttab_resume(void) static int gnttab_suspend(void) { - apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_grant_frames, - unmap_pte_fn, NULL); - + arch_gnttab_unmap_shared(shared, nr_grant_frames); return 0; } diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 9fd2f70ab46d..0f86b0ff7879 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -399,7 +399,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) *vaddr = NULL; - area = alloc_vm_area(PAGE_SIZE); + area = xen_alloc_vm_area(PAGE_SIZE); if (!area) return -ENOMEM; @@ -409,7 +409,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) BUG(); if (op.status != GNTST_okay) { - free_vm_area(area); + xen_free_vm_area(area); xenbus_dev_fatal(dev, op.status, "mapping in shared page %d from domain %d", gnt_ref, dev->otherend_id); @@ -508,7 +508,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) BUG(); if (op.status == GNTST_okay) - free_vm_area(area); + xen_free_vm_area(area); else xenbus_dev_error(dev, op.status, "unmapping page at handle %d error %d", diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 4750de316ad3..57ceb5346b74 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -88,6 +88,16 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv) return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; } +static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + + if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) + return -ENOMEM; + + return 0; +} + /* device/<type>/<id> => <type>-<id> */ static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) { @@ -166,6 +176,7 @@ static struct xen_bus_type xenbus_frontend = { .bus = { .name = "xen", .match = xenbus_match, + .uevent = xenbus_uevent, .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, @@ -438,6 +449,12 @@ static ssize_t xendev_show_devtype(struct device *dev, } DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); +static ssize_t xendev_show_modalias(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); +} +DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); int xenbus_probe_node(struct xen_bus_type *bus, const char *type, @@ -492,10 +509,16 @@ int xenbus_probe_node(struct xen_bus_type *bus, err = device_create_file(&xendev->dev, &dev_attr_devtype); if (err) - goto fail_remove_file; + goto fail_remove_nodename; + + err = device_create_file(&xendev->dev, &dev_attr_modalias); + if (err) + goto fail_remove_devtype; return 0; -fail_remove_file: +fail_remove_devtype: + device_remove_file(&xendev->dev, &dev_attr_devtype); +fail_remove_nodename: device_remove_file(&xendev->dev, &dev_attr_nodename); fail_unregister: device_unregister(&xendev->dev); @@ -846,6 +869,7 @@ static int is_disconnected_device(struct device *dev, void *data) { struct xenbus_device *xendev = to_xenbus_device(dev); struct device_driver *drv = data; + struct xenbus_driver *xendrv; /* * A device with no driver will never connect. We care only about @@ -858,7 +882,9 @@ static int is_disconnected_device(struct device *dev, void *data) if (drv && (dev->driver != drv)) return 0; - return (xendev->state != XenbusStateConnected); + xendrv = to_xenbus_driver(dev->driver); + return (xendev->state != XenbusStateConnected || + (xendrv->is_ready && !xendrv->is_ready(xendev))); } static int exists_disconnected_device(struct device_driver *drv) diff --git a/drivers/xen/xencomm.c b/drivers/xen/xencomm.c new file mode 100644 index 000000000000..797cb4e31f07 --- /dev/null +++ b/drivers/xen/xencomm.c @@ -0,0 +1,232 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (C) IBM Corp. 2006 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/gfp.h> +#include <linux/mm.h> +#include <asm/page.h> +#include <xen/xencomm.h> +#include <xen/interface/xen.h> +#ifdef __ia64__ +#include <asm/xen/xencomm.h> /* for is_kern_addr() */ +#endif + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include <xen/platform-compat.h> +#endif + +static int xencomm_init(struct xencomm_desc *desc, + void *buffer, unsigned long bytes) +{ + unsigned long recorded = 0; + int i = 0; + + while ((recorded < bytes) && (i < desc->nr_addrs)) { + unsigned long vaddr = (unsigned long)buffer + recorded; + unsigned long paddr; + int offset; + int chunksz; + + offset = vaddr % PAGE_SIZE; /* handle partial pages */ + chunksz = min(PAGE_SIZE - offset, bytes - recorded); + + paddr = xencomm_vtop(vaddr); + if (paddr == ~0UL) { + printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n", + __func__, vaddr); + return -EINVAL; + } + + desc->address[i++] = paddr; + recorded += chunksz; + } + + if (recorded < bytes) { + printk(KERN_DEBUG + "%s: could only translate %ld of %ld bytes\n", + __func__, recorded, bytes); + return -ENOSPC; + } + + /* mark remaining addresses invalid (just for safety) */ + while (i < desc->nr_addrs) + desc->address[i++] = XENCOMM_INVALID; + + desc->magic = XENCOMM_MAGIC; + + return 0; +} + +static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask, + void *buffer, unsigned long bytes) +{ + struct xencomm_desc *desc; + unsigned long buffer_ulong = (unsigned long)buffer; + unsigned long start = buffer_ulong & PAGE_MASK; + unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK; + unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT; + unsigned long size = sizeof(*desc) + + sizeof(desc->address[0]) * nr_addrs; + + /* + * slab allocator returns at least sizeof(void*) aligned pointer. + * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might + * cross page boundary. + */ + if (sizeof(*desc) > sizeof(void *)) { + unsigned long order = get_order(size); + desc = (struct xencomm_desc *)__get_free_pages(gfp_mask, + order); + if (desc == NULL) + return NULL; + + desc->nr_addrs = + ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) / + sizeof(*desc->address); + } else { + desc = kmalloc(size, gfp_mask); + if (desc == NULL) + return NULL; + + desc->nr_addrs = nr_addrs; + } + return desc; +} + +void xencomm_free(struct xencomm_handle *desc) +{ + if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) { + struct xencomm_desc *desc__ = (struct xencomm_desc *)desc; + if (sizeof(*desc__) > sizeof(void *)) { + unsigned long size = sizeof(*desc__) + + sizeof(desc__->address[0]) * desc__->nr_addrs; + unsigned long order = get_order(size); + free_pages((unsigned long)__va(desc), order); + } else + kfree(__va(desc)); + } +} + +static int xencomm_create(void *buffer, unsigned long bytes, + struct xencomm_desc **ret, gfp_t gfp_mask) +{ + struct xencomm_desc *desc; + int rc; + + pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes); + + if (bytes == 0) { + /* don't create a descriptor; Xen recognizes NULL. */ + BUG_ON(buffer != NULL); + *ret = NULL; + return 0; + } + + BUG_ON(buffer == NULL); /* 'bytes' is non-zero */ + + desc = xencomm_alloc(gfp_mask, buffer, bytes); + if (!desc) { + printk(KERN_DEBUG "%s failure\n", "xencomm_alloc"); + return -ENOMEM; + } + + rc = xencomm_init(desc, buffer, bytes); + if (rc) { + printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc); + xencomm_free((struct xencomm_handle *)__pa(desc)); + return rc; + } + + *ret = desc; + return 0; +} + +/* check if memory address is within VMALLOC region */ +static int is_phys_contiguous(unsigned long addr) +{ + if (!is_kernel_addr(addr)) + return 0; + + return (addr < VMALLOC_START) || (addr >= VMALLOC_END); +} + +static struct xencomm_handle *xencomm_create_inline(void *ptr) +{ + unsigned long paddr; + + BUG_ON(!is_phys_contiguous((unsigned long)ptr)); + + paddr = (unsigned long)xencomm_pa(ptr); + BUG_ON(paddr & XENCOMM_INLINE_FLAG); + return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG); +} + +/* "mini" routine, for stack-based communications: */ +static int xencomm_create_mini(void *buffer, + unsigned long bytes, struct xencomm_mini *xc_desc, + struct xencomm_desc **ret) +{ + int rc = 0; + struct xencomm_desc *desc; + BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0); + + desc = (void *)xc_desc; + + desc->nr_addrs = XENCOMM_MINI_ADDRS; + + rc = xencomm_init(desc, buffer, bytes); + if (!rc) + *ret = desc; + + return rc; +} + +struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes) +{ + int rc; + struct xencomm_desc *desc; + + if (is_phys_contiguous((unsigned long)ptr)) + return xencomm_create_inline(ptr); + + rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL); + + if (rc || desc == NULL) + return NULL; + + return xencomm_pa(desc); +} + +struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes, + struct xencomm_mini *xc_desc) +{ + int rc; + struct xencomm_desc *desc = NULL; + + if (is_phys_contiguous((unsigned long)ptr)) + return xencomm_create_inline(ptr); + + rc = xencomm_create_mini(ptr, bytes, xc_desc, + &desc); + + if (rc) + return NULL; + + return xencomm_pa(desc); +} |