aboutsummaryrefslogtreecommitdiff
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/alarmtimer.c47
-rw-r--r--kernel/time/clockevents.c271
-rw-r--r--kernel/time/clocksource.c266
-rw-r--r--kernel/time/sched_clock.c212
-rw-r--r--kernel/time/tick-broadcast.c126
-rw-r--r--kernel/time/tick-common.c197
-rw-r--r--kernel/time/tick-internal.h17
-rw-r--r--kernel/time/timekeeping.c65
-rw-r--r--kernel/time/timekeeping_debug.c72
-rw-r--r--kernel/time/timekeeping_internal.h14
11 files changed, 1026 insertions, 263 deletions
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index ff7d9d2ab504..9250130646f5 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -4,6 +4,8 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
+obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
obj-$(CONFIG_TIMER_STATS) += timer_stats.o
+obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index f11d83b12949..eec50fcef9e4 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -199,6 +199,13 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
}
+ktime_t alarm_expires_remaining(const struct alarm *alarm)
+{
+ struct alarm_base *base = &alarm_bases[alarm->type];
+ return ktime_sub(alarm->node.expires, base->gettime());
+}
+EXPORT_SYMBOL_GPL(alarm_expires_remaining);
+
#ifdef CONFIG_RTC_CLASS
/**
* alarmtimer_suspend - Suspend time callback
@@ -303,9 +310,10 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
alarm->type = type;
alarm->state = ALARMTIMER_STATE_INACTIVE;
}
+EXPORT_SYMBOL_GPL(alarm_init);
/**
- * alarm_start - Sets an alarm to fire
+ * alarm_start - Sets an absolute alarm to fire
* @alarm: ptr to alarm to set
* @start: time to run the alarm
*/
@@ -323,6 +331,34 @@ int alarm_start(struct alarm *alarm, ktime_t start)
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
+EXPORT_SYMBOL_GPL(alarm_start);
+
+/**
+ * alarm_start_relative - Sets a relative alarm to fire
+ * @alarm: ptr to alarm to set
+ * @start: time relative to now to run the alarm
+ */
+int alarm_start_relative(struct alarm *alarm, ktime_t start)
+{
+ struct alarm_base *base = &alarm_bases[alarm->type];
+
+ start = ktime_add(start, base->gettime());
+ return alarm_start(alarm, start);
+}
+EXPORT_SYMBOL_GPL(alarm_start_relative);
+
+void alarm_restart(struct alarm *alarm)
+{
+ struct alarm_base *base = &alarm_bases[alarm->type];
+ unsigned long flags;
+
+ spin_lock_irqsave(&base->lock, flags);
+ hrtimer_set_expires(&alarm->timer, alarm->node.expires);
+ hrtimer_restart(&alarm->timer);
+ alarmtimer_enqueue(base, alarm);
+ spin_unlock_irqrestore(&base->lock, flags);
+}
+EXPORT_SYMBOL_GPL(alarm_restart);
/**
* alarm_try_to_cancel - Tries to cancel an alarm timer
@@ -344,6 +380,7 @@ int alarm_try_to_cancel(struct alarm *alarm)
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
+EXPORT_SYMBOL_GPL(alarm_try_to_cancel);
/**
@@ -361,6 +398,7 @@ int alarm_cancel(struct alarm *alarm)
cpu_relax();
}
}
+EXPORT_SYMBOL_GPL(alarm_cancel);
u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
@@ -393,8 +431,15 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
alarm->node.expires = ktime_add(alarm->node.expires, interval);
return overrun;
}
+EXPORT_SYMBOL_GPL(alarm_forward);
+u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
+{
+ struct alarm_base *base = &alarm_bases[alarm->type];
+ return alarm_forward(alarm, base->gettime(), interval);
+}
+EXPORT_SYMBOL_GPL(alarm_forward_now);
/**
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index c6d6400ee137..38959c866789 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -15,20 +15,23 @@
#include <linux/hrtimer.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/notifier.h>
#include <linux/smp.h>
+#include <linux/device.h>
#include "tick-internal.h"
/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);
-
-/* Notification for clock events */
-static RAW_NOTIFIER_HEAD(clockevents_chain);
-
/* Protection for the above */
static DEFINE_RAW_SPINLOCK(clockevents_lock);
+/* Protection for unbind operations */
+static DEFINE_MUTEX(clockevents_mutex);
+
+struct ce_unbind {
+ struct clock_event_device *ce;
+ int res;
+};
/**
* clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
@@ -232,47 +235,107 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
return (rc && force) ? clockevents_program_min_delta(dev) : rc;
}
-/**
- * clockevents_register_notifier - register a clock events change listener
+/*
+ * Called after a notify add to make devices available which were
+ * released from the notifier call.
*/
-int clockevents_register_notifier(struct notifier_block *nb)
+static void clockevents_notify_released(void)
{
- unsigned long flags;
- int ret;
+ struct clock_event_device *dev;
- raw_spin_lock_irqsave(&clockevents_lock, flags);
- ret = raw_notifier_chain_register(&clockevents_chain, nb);
- raw_spin_unlock_irqrestore(&clockevents_lock, flags);
+ while (!list_empty(&clockevents_released)) {
+ dev = list_entry(clockevents_released.next,
+ struct clock_event_device, list);
+ list_del(&dev->list);
+ list_add(&dev->list, &clockevent_devices);
+ tick_check_new_device(dev);
+ }
+}
- return ret;
+/*
+ * Try to install a replacement clock event device
+ */
+static int clockevents_replace(struct clock_event_device *ced)
+{
+ struct clock_event_device *dev, *newdev = NULL;
+
+ list_for_each_entry(dev, &clockevent_devices, list) {
+ if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED)
+ continue;
+
+ if (!tick_check_replacement(newdev, dev))
+ continue;
+
+ if (!try_module_get(dev->owner))
+ continue;
+
+ if (newdev)
+ module_put(newdev->owner);
+ newdev = dev;
+ }
+ if (newdev) {
+ tick_install_replacement(newdev);
+ list_del_init(&ced->list);
+ }
+ return newdev ? 0 : -EBUSY;
}
/*
- * Notify about a clock event change. Called with clockevents_lock
- * held.
+ * Called with clockevents_mutex and clockevents_lock held
*/
-static void clockevents_do_notify(unsigned long reason, void *dev)
+static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
{
- raw_notifier_call_chain(&clockevents_chain, reason, dev);
+ /* Fast track. Device is unused */
+ if (ced->mode == CLOCK_EVT_MODE_UNUSED) {
+ list_del_init(&ced->list);
+ return 0;
+ }
+
+ return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY;
}
/*
- * Called after a notify add to make devices available which were
- * released from the notifier call.
+ * SMP function call to unbind a device
*/
-static void clockevents_notify_released(void)
+static void __clockevents_unbind(void *arg)
{
- struct clock_event_device *dev;
+ struct ce_unbind *cu = arg;
+ int res;
+
+ raw_spin_lock(&clockevents_lock);
+ res = __clockevents_try_unbind(cu->ce, smp_processor_id());
+ if (res == -EAGAIN)
+ res = clockevents_replace(cu->ce);
+ cu->res = res;
+ raw_spin_unlock(&clockevents_lock);
+}
- while (!list_empty(&clockevents_released)) {
- dev = list_entry(clockevents_released.next,
- struct clock_event_device, list);
- list_del(&dev->list);
- list_add(&dev->list, &clockevent_devices);
- clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
- }
+/*
+ * Issues smp function call to unbind a per cpu device. Called with
+ * clockevents_mutex held.
+ */
+static int clockevents_unbind(struct clock_event_device *ced, int cpu)
+{
+ struct ce_unbind cu = { .ce = ced, .res = -ENODEV };
+
+ smp_call_function_single(cpu, __clockevents_unbind, &cu, 1);
+ return cu.res;
}
+/*
+ * Unbind a clockevents device.
+ */
+int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
+{
+ int ret;
+
+ mutex_lock(&clockevents_mutex);
+ ret = clockevents_unbind(ced, cpu);
+ mutex_unlock(&clockevents_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(clockevents_unbind);
+
/**
* clockevents_register_device - register a clock event device
* @dev: device to register
@@ -290,7 +353,7 @@ void clockevents_register_device(struct clock_event_device *dev)
raw_spin_lock_irqsave(&clockevents_lock, flags);
list_add(&dev->list, &clockevent_devices);
- clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
+ tick_check_new_device(dev);
clockevents_notify_released();
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
@@ -386,6 +449,7 @@ void clockevents_exchange_device(struct clock_event_device *old,
* released list and do a notify add later.
*/
if (old) {
+ module_put(old->owner);
clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
list_del(&old->list);
list_add(&old->list, &clockevents_released);
@@ -433,10 +497,36 @@ void clockevents_notify(unsigned long reason, void *arg)
int cpu;
raw_spin_lock_irqsave(&clockevents_lock, flags);
- clockevents_do_notify(reason, arg);
switch (reason) {
+ case CLOCK_EVT_NOTIFY_BROADCAST_ON:
+ case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
+ case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
+ tick_broadcast_on_off(reason, arg);
+ break;
+
+ case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
+ case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
+ tick_broadcast_oneshot_control(reason);
+ break;
+
+ case CLOCK_EVT_NOTIFY_CPU_DYING:
+ tick_handover_do_timer(arg);
+ break;
+
+ case CLOCK_EVT_NOTIFY_SUSPEND:
+ tick_suspend();
+ tick_suspend_broadcast();
+ break;
+
+ case CLOCK_EVT_NOTIFY_RESUME:
+ tick_resume();
+ break;
+
case CLOCK_EVT_NOTIFY_CPU_DEAD:
+ tick_shutdown_broadcast_oneshot(arg);
+ tick_shutdown_broadcast(arg);
+ tick_shutdown(arg);
/*
* Unregister the clock event devices which were
* released from the users in the notify chain.
@@ -462,4 +552,123 @@ void clockevents_notify(unsigned long reason, void *arg)
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
+
+#ifdef CONFIG_SYSFS
+struct bus_type clockevents_subsys = {
+ .name = "clockevents",
+ .dev_name = "clockevent",
+};
+
+static DEFINE_PER_CPU(struct device, tick_percpu_dev);
+static struct tick_device *tick_get_tick_dev(struct device *dev);
+
+static ssize_t sysfs_show_current_tick_dev(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct tick_device *td;
+ ssize_t count = 0;
+
+ raw_spin_lock_irq(&clockevents_lock);
+ td = tick_get_tick_dev(dev);
+ if (td && td->evtdev)
+ count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name);
+ raw_spin_unlock_irq(&clockevents_lock);
+ return count;
+}
+static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL);
+
+/* We don't support the abomination of removable broadcast devices */
+static ssize_t sysfs_unbind_tick_dev(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ char name[CS_NAME_LEN];
+ size_t ret = sysfs_get_uname(buf, name, count);
+ struct clock_event_device *ce;
+
+ if (ret < 0)
+ return ret;
+
+ ret = -ENODEV;
+ mutex_lock(&clockevents_mutex);
+ raw_spin_lock_irq(&clockevents_lock);
+ list_for_each_entry(ce, &clockevent_devices, list) {
+ if (!strcmp(ce->name, name)) {
+ ret = __clockevents_try_unbind(ce, dev->id);
+ break;
+ }
+ }
+ raw_spin_unlock_irq(&clockevents_lock);
+ /*
+ * We hold clockevents_mutex, so ce can't go away
+ */
+ if (ret == -EAGAIN)
+ ret = clockevents_unbind(ce, dev->id);
+ mutex_unlock(&clockevents_mutex);
+ return ret ? ret : count;
+}
+static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+static struct device tick_bc_dev = {
+ .init_name = "broadcast",
+ .id = 0,
+ .bus = &clockevents_subsys,
+};
+
+static struct tick_device *tick_get_tick_dev(struct device *dev)
+{
+ return dev == &tick_bc_dev ? tick_get_broadcast_device() :
+ &per_cpu(tick_cpu_device, dev->id);
+}
+
+static __init int tick_broadcast_init_sysfs(void)
+{
+ int err = device_register(&tick_bc_dev);
+
+ if (!err)
+ err = device_create_file(&tick_bc_dev, &dev_attr_current_device);
+ return err;
+}
+#else
+static struct tick_device *tick_get_tick_dev(struct device *dev)
+{
+ return &per_cpu(tick_cpu_device, dev->id);
+}
+static inline int tick_broadcast_init_sysfs(void) { return 0; }
#endif
+
+static int __init tick_init_sysfs(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct device *dev = &per_cpu(tick_percpu_dev, cpu);
+ int err;
+
+ dev->id = cpu;
+ dev->bus = &clockevents_subsys;
+ err = device_register(dev);
+ if (!err)
+ err = device_create_file(dev, &dev_attr_current_device);
+ if (!err)
+ err = device_create_file(dev, &dev_attr_unbind_device);
+ if (err)
+ return err;
+ }
+ return tick_broadcast_init_sysfs();
+}
+
+static int __init clockevents_init_sysfs(void)
+{
+ int err = subsys_system_register(&clockevents_subsys, NULL);
+
+ if (!err)
+ err = tick_init_sysfs();
+ return err;
+}
+device_initcall(clockevents_init_sysfs);
+#endif /* SYSFS */
+
+#endif /* GENERIC_CLOCK_EVENTS */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c9583382141a..50a8736757f3 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -31,6 +31,8 @@
#include <linux/tick.h>
#include <linux/kthread.h>
+#include "tick-internal.h"
+
void timecounter_init(struct timecounter *tc,
const struct cyclecounter *cc,
u64 start_tstamp)
@@ -174,11 +176,12 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
static struct clocksource *curr_clocksource;
static LIST_HEAD(clocksource_list);
static DEFINE_MUTEX(clocksource_mutex);
-static char override_name[32];
+static char override_name[CS_NAME_LEN];
static int finished_booting;
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static void clocksource_watchdog_work(struct work_struct *work);
+static void clocksource_select(void);
static LIST_HEAD(watchdog_list);
static struct clocksource *watchdog;
@@ -299,13 +302,30 @@ static void clocksource_watchdog(unsigned long data)
if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
(cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
(watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
+ /* Mark it valid for high-res. */
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
+
+ /*
+ * clocksource_done_booting() will sort it if
+ * finished_booting is not set yet.
+ */
+ if (!finished_booting)
+ continue;
+
/*
- * We just marked the clocksource as highres-capable,
- * notify the rest of the system as well so that we
- * transition into high-res mode:
+ * If this is not the current clocksource let
+ * the watchdog thread reselect it. Due to the
+ * change to high res this clocksource might
+ * be preferred now. If it is the current
+ * clocksource let the tick code know about
+ * that change.
*/
- tick_clock_notify();
+ if (cs != curr_clocksource) {
+ cs->flags |= CLOCK_SOURCE_RESELECT;
+ schedule_work(&watchdog_work);
+ } else {
+ tick_clock_notify();
+ }
}
}
@@ -388,44 +408,39 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
static void clocksource_dequeue_watchdog(struct clocksource *cs)
{
- struct clocksource *tmp;
unsigned long flags;
spin_lock_irqsave(&watchdog_lock, flags);
- if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
- /* cs is a watched clocksource. */
- list_del_init(&cs->wd_list);
- } else if (cs == watchdog) {
- /* Reset watchdog cycles */
- clocksource_reset_watchdog();
- /* Current watchdog is removed. Find an alternative. */
- watchdog = NULL;
- list_for_each_entry(tmp, &clocksource_list, list) {
- if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY)
- continue;
- if (!watchdog || tmp->rating > watchdog->rating)
- watchdog = tmp;
+ if (cs != watchdog) {
+ if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
+ /* cs is a watched clocksource. */
+ list_del_init(&cs->wd_list);
+ /* Check if the watchdog timer needs to be stopped. */
+ clocksource_stop_watchdog();
}
}
- cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
- /* Check if the watchdog timer needs to be stopped. */
- clocksource_stop_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
}
-static int clocksource_watchdog_kthread(void *data)
+static int __clocksource_watchdog_kthread(void)
{
struct clocksource *cs, *tmp;
unsigned long flags;
LIST_HEAD(unstable);
+ int select = 0;
- mutex_lock(&clocksource_mutex);
spin_lock_irqsave(&watchdog_lock, flags);
- list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list)
+ list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
list_del_init(&cs->wd_list);
list_add(&cs->wd_list, &unstable);
+ select = 1;
}
+ if (cs->flags & CLOCK_SOURCE_RESELECT) {
+ cs->flags &= ~CLOCK_SOURCE_RESELECT;
+ select = 1;
+ }
+ }
/* Check if the watchdog timer needs to be stopped. */
clocksource_stop_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
@@ -435,10 +450,23 @@ static int clocksource_watchdog_kthread(void *data)
list_del_init(&cs->wd_list);
__clocksource_change_rating(cs, 0);
}
+ return select;
+}
+
+static int clocksource_watchdog_kthread(void *data)
+{
+ mutex_lock(&clocksource_mutex);
+ if (__clocksource_watchdog_kthread())
+ clocksource_select();
mutex_unlock(&clocksource_mutex);
return 0;
}
+static bool clocksource_is_watchdog(struct clocksource *cs)
+{
+ return cs == watchdog;
+}
+
#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
static void clocksource_enqueue_watchdog(struct clocksource *cs)
@@ -449,7 +477,8 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
static inline void clocksource_resume_watchdog(void) { }
-static inline int clocksource_watchdog_kthread(void *data) { return 0; }
+static inline int __clocksource_watchdog_kthread(void) { return 0; }
+static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
@@ -553,24 +582,42 @@ static u64 clocksource_max_deferment(struct clocksource *cs)
#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
-/**
- * clocksource_select - Select the best clocksource available
- *
- * Private function. Must hold clocksource_mutex when called.
- *
- * Select the clocksource with the best rating, or the clocksource,
- * which is selected by userspace override.
- */
-static void clocksource_select(void)
+static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
{
- struct clocksource *best, *cs;
+ struct clocksource *cs;
if (!finished_booting || list_empty(&clocksource_list))
+ return NULL;
+
+ /*
+ * We pick the clocksource with the highest rating. If oneshot
+ * mode is active, we pick the highres valid clocksource with
+ * the best rating.
+ */
+ list_for_each_entry(cs, &clocksource_list, list) {
+ if (skipcur && cs == curr_clocksource)
+ continue;
+ if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
+ continue;
+ return cs;
+ }
+ return NULL;
+}
+
+static void __clocksource_select(bool skipcur)
+{
+ bool oneshot = tick_oneshot_mode_active();
+ struct clocksource *best, *cs;
+
+ /* Find the best suitable clocksource */
+ best = clocksource_find_best(oneshot, skipcur);
+ if (!best)
return;
- /* First clocksource on the list has the best rating. */
- best = list_first_entry(&clocksource_list, struct clocksource, list);
+
/* Check for the override clocksource. */
list_for_each_entry(cs, &clocksource_list, list) {
+ if (skipcur && cs == curr_clocksource)
+ continue;
if (strcmp(cs->name, override_name) != 0)
continue;
/*
@@ -578,8 +625,7 @@ static void clocksource_select(void)
* capable clocksource if the tick code is in oneshot
* mode (highres or nohz)
*/
- if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
- tick_oneshot_mode_active()) {
+ if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
/* Override clocksource cannot be used. */
printk(KERN_WARNING "Override clocksource %s is not "
"HRT compatible. Cannot switch while in "
@@ -590,16 +636,35 @@ static void clocksource_select(void)
best = cs;
break;
}
- if (curr_clocksource != best) {
- printk(KERN_INFO "Switching to clocksource %s\n", best->name);
+
+ if (curr_clocksource != best && !timekeeping_notify(best)) {
+ pr_info("Switched to clocksource %s\n", best->name);
curr_clocksource = best;
- timekeeping_notify(curr_clocksource);
}
}
+/**
+ * clocksource_select - Select the best clocksource available
+ *
+ * Private function. Must hold clocksource_mutex when called.
+ *
+ * Select the clocksource with the best rating, or the clocksource,
+ * which is selected by userspace override.
+ */
+static void clocksource_select(void)
+{
+ return __clocksource_select(false);
+}
+
+static void clocksource_select_fallback(void)
+{
+ return __clocksource_select(true);
+}
+
#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
static inline void clocksource_select(void) { }
+static inline void clocksource_select_fallback(void) { }
#endif
@@ -614,16 +679,11 @@ static int __init clocksource_done_booting(void)
{
mutex_lock(&clocksource_mutex);
curr_clocksource = clocksource_default_clock();
- mutex_unlock(&clocksource_mutex);
-
finished_booting = 1;
-
/*
* Run the watchdog first to eliminate unstable clock sources
*/
- clocksource_watchdog_kthread(NULL);
-
- mutex_lock(&clocksource_mutex);
+ __clocksource_watchdog_kthread();
clocksource_select();
mutex_unlock(&clocksource_mutex);
return 0;
@@ -756,7 +816,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
list_del(&cs->list);
cs->rating = rating;
clocksource_enqueue(cs);
- clocksource_select();
}
/**
@@ -768,21 +827,47 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
{
mutex_lock(&clocksource_mutex);
__clocksource_change_rating(cs, rating);
+ clocksource_select();
mutex_unlock(&clocksource_mutex);
}
EXPORT_SYMBOL(clocksource_change_rating);
+/*
+ * Unbind clocksource @cs. Called with clocksource_mutex held
+ */
+static int clocksource_unbind(struct clocksource *cs)
+{
+ /*
+ * I really can't convince myself to support this on hardware
+ * designed by lobotomized monkeys.
+ */
+ if (clocksource_is_watchdog(cs))
+ return -EBUSY;
+
+ if (cs == curr_clocksource) {
+ /* Select and try to install a replacement clock source */
+ clocksource_select_fallback();
+ if (curr_clocksource == cs)
+ return -EBUSY;
+ }
+ clocksource_dequeue_watchdog(cs);
+ list_del_init(&cs->list);
+ return 0;
+}
+
/**
* clocksource_unregister - remove a registered clocksource
* @cs: clocksource to be unregistered
*/
-void clocksource_unregister(struct clocksource *cs)
+int clocksource_unregister(struct clocksource *cs)
{
+ int ret = 0;
+
mutex_lock(&clocksource_mutex);
- clocksource_dequeue_watchdog(cs);
- list_del(&cs->list);
- clocksource_select();
+ if (!list_empty(&cs->list))
+ ret = clocksource_unbind(cs);
mutex_unlock(&clocksource_mutex);
+ return ret;
}
EXPORT_SYMBOL(clocksource_unregister);
@@ -808,6 +893,23 @@ sysfs_show_current_clocksources(struct device *dev,
return count;
}
+size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
+{
+ size_t ret = cnt;
+
+ /* strings from sysfs write are not 0 terminated! */
+ if (!cnt || cnt >= CS_NAME_LEN)
+ return -EINVAL;
+
+ /* strip of \n: */
+ if (buf[cnt-1] == '\n')
+ cnt--;
+ if (cnt > 0)
+ memcpy(dst, buf, cnt);
+ dst[cnt] = 0;
+ return ret;
+}
+
/**
* sysfs_override_clocksource - interface for manually overriding clocksource
* @dev: unused
@@ -822,22 +924,13 @@ static ssize_t sysfs_override_clocksource(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- size_t ret = count;
-
- /* strings from sysfs write are not 0 terminated! */
- if (count >= sizeof(override_name))
- return -EINVAL;
-
- /* strip of \n: */
- if (buf[count-1] == '\n')
- count--;
+ size_t ret;
mutex_lock(&clocksource_mutex);
- if (count > 0)
- memcpy(override_name, buf, count);
- override_name[count] = 0;
- clocksource_select();
+ ret = sysfs_get_uname(buf, override_name, count);
+ if (ret >= 0)
+ clocksource_select();
mutex_unlock(&clocksource_mutex);
@@ -845,6 +938,40 @@ static ssize_t sysfs_override_clocksource(struct device *dev,
}
/**
+ * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource
+ * @dev: unused
+ * @attr: unused
+ * @buf: unused
+ * @count: length of buffer
+ *
+ * Takes input from sysfs interface for manually unbinding a clocksource.
+ */
+static ssize_t sysfs_unbind_clocksource(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct clocksource *cs;
+ char name[CS_NAME_LEN];
+ size_t ret;
+
+ ret = sysfs_get_uname(buf, name, count);
+ if (ret < 0)
+ return ret;
+
+ ret = -ENODEV;
+ mutex_lock(&clocksource_mutex);
+ list_for_each_entry(cs, &clocksource_list, list) {
+ if (strcmp(cs->name, name))
+ continue;
+ ret = clocksource_unbind(cs);
+ break;
+ }
+ mutex_unlock(&clocksource_mutex);
+
+ return ret ? ret : count;
+}
+
+/**
* sysfs_show_available_clocksources - sysfs interface for listing clocksource
* @dev: unused
* @attr: unused
@@ -886,6 +1013,8 @@ sysfs_show_available_clocksources(struct device *dev,
static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
sysfs_override_clocksource);
+static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource);
+
static DEVICE_ATTR(available_clocksource, 0444,
sysfs_show_available_clocksources, NULL);
@@ -910,6 +1039,9 @@ static int __init init_clocksource_sysfs(void)
&device_clocksource,
&dev_attr_current_clocksource);
if (!error)
+ error = device_create_file(&device_clocksource,
+ &dev_attr_unbind_clocksource);
+ if (!error)
error = device_create_file(
&device_clocksource,
&dev_attr_available_clocksource);
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
new file mode 100644
index 000000000000..a326f27d7f09
--- /dev/null
+++ b/kernel/time/sched_clock.c
@@ -0,0 +1,212 @@
+/*
+ * sched_clock.c: support for extending counters to full 64-bit ns counter
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/sched.h>
+#include <linux/syscore_ops.h>
+#include <linux/timer.h>
+#include <linux/sched_clock.h>
+
+struct clock_data {
+ u64 epoch_ns;
+ u32 epoch_cyc;
+ u32 epoch_cyc_copy;
+ unsigned long rate;
+ u32 mult;
+ u32 shift;
+ bool suspended;
+};
+
+static void sched_clock_poll(unsigned long wrap_ticks);
+static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
+static int irqtime = -1;
+
+core_param(irqtime, irqtime, int, 0400);
+
+static struct clock_data cd = {
+ .mult = NSEC_PER_SEC / HZ,
+};
+
+static u32 __read_mostly sched_clock_mask = 0xffffffff;
+
+static u32 notrace jiffy_sched_clock_read(void)
+{
+ return (u32)(jiffies - INITIAL_JIFFIES);
+}
+
+static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+
+static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
+{
+ return (cyc * mult) >> shift;
+}
+
+static unsigned long long notrace sched_clock_32(void)
+{
+ u64 epoch_ns;
+ u32 epoch_cyc;
+ u32 cyc;
+
+ if (cd.suspended)
+ return cd.epoch_ns;
+
+ /*
+ * Load the epoch_cyc and epoch_ns atomically. We do this by
+ * ensuring that we always write epoch_cyc, epoch_ns and
+ * epoch_cyc_copy in strict order, and read them in strict order.
+ * If epoch_cyc and epoch_cyc_copy are not equal, then we're in
+ * the middle of an update, and we should repeat the load.
+ */
+ do {
+ epoch_cyc = cd.epoch_cyc;
+ smp_rmb();
+ epoch_ns = cd.epoch_ns;
+ smp_rmb();
+ } while (epoch_cyc != cd.epoch_cyc_copy);
+
+ cyc = read_sched_clock();
+ cyc = (cyc - epoch_cyc) & sched_clock_mask;
+ return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
+}
+
+/*
+ * Atomically update the sched_clock epoch.
+ */
+static void notrace update_sched_clock(void)
+{
+ unsigned long flags;
+ u32 cyc;
+ u64 ns;
+
+ cyc = read_sched_clock();
+ ns = cd.epoch_ns +
+ cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
+ cd.mult, cd.shift);
+ /*
+ * Write epoch_cyc and epoch_ns in a way that the update is
+ * detectable in cyc_to_fixed_sched_clock().
+ */
+ raw_local_irq_save(flags);
+ cd.epoch_cyc_copy = cyc;
+ smp_wmb();
+ cd.epoch_ns = ns;
+ smp_wmb();
+ cd.epoch_cyc = cyc;
+ raw_local_irq_restore(flags);
+}
+
+static void sched_clock_poll(unsigned long wrap_ticks)
+{
+ mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
+ update_sched_clock();
+}
+
+void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
+{
+ unsigned long r, w;
+ u64 res, wrap;
+ char r_unit;
+
+ if (cd.rate > rate)
+ return;
+
+ BUG_ON(bits > 32);
+ WARN_ON(!irqs_disabled());
+ read_sched_clock = read;
+ sched_clock_mask = (1 << bits) - 1;
+ cd.rate = rate;
+
+ /* calculate the mult/shift to convert counter ticks to ns. */
+ clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0);
+
+ r = rate;
+ if (r >= 4000000) {
+ r /= 1000000;
+ r_unit = 'M';
+ } else if (r >= 1000) {
+ r /= 1000;
+ r_unit = 'k';
+ } else
+ r_unit = ' ';
+
+ /* calculate how many ns until we wrap */
+ wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift);
+ do_div(wrap, NSEC_PER_MSEC);
+ w = wrap;
+
+ /* calculate the ns resolution of this counter */
+ res = cyc_to_ns(1ULL, cd.mult, cd.shift);
+ pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
+ bits, r, r_unit, res, w);
+
+ /*
+ * Start the timer to keep sched_clock() properly updated and
+ * sets the initial epoch.
+ */
+ sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
+ update_sched_clock();
+
+ /*
+ * Ensure that sched_clock() starts off at 0ns
+ */
+ cd.epoch_ns = 0;
+
+ /* Enable IRQ time accounting if we have a fast enough sched_clock */
+ if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
+ enable_sched_clock_irqtime();
+
+ pr_debug("Registered %pF as sched_clock source\n", read);
+}
+
+unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32;
+
+unsigned long long notrace sched_clock(void)
+{
+ return sched_clock_func();
+}
+
+void __init sched_clock_postinit(void)
+{
+ /*
+ * If no sched_clock function has been provided at that point,
+ * make it the final one one.
+ */
+ if (read_sched_clock == jiffy_sched_clock_read)
+ setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
+
+ sched_clock_poll(sched_clock_timer.data);
+}
+
+static int sched_clock_suspend(void)
+{
+ sched_clock_poll(sched_clock_timer.data);
+ cd.suspended = true;
+ return 0;
+}
+
+static void sched_clock_resume(void)
+{
+ cd.epoch_cyc = read_sched_clock();
+ cd.epoch_cyc_copy = cd.epoch_cyc;
+ cd.suspended = false;
+}
+
+static struct syscore_ops sched_clock_ops = {
+ .suspend = sched_clock_suspend,
+ .resume = sched_clock_resume,
+};
+
+static int __init sched_clock_syscore_init(void)
+{
+ register_syscore_ops(&sched_clock_ops);
+ return 0;
+}
+device_initcall(sched_clock_syscore_init);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 20d6fba70652..6d3f91631de6 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -19,6 +19,7 @@
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/smp.h>
+#include <linux/module.h>
#include "tick-internal.h"
@@ -29,6 +30,7 @@
static struct tick_device tick_broadcast_device;
static cpumask_var_t tick_broadcast_mask;
+static cpumask_var_t tick_broadcast_on;
static cpumask_var_t tmpmask;
static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
static int tick_broadcast_force;
@@ -64,17 +66,34 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc)
/*
* Check, if the device can be utilized as broadcast device:
*/
-int tick_check_broadcast_device(struct clock_event_device *dev)
+static bool tick_check_broadcast_device(struct clock_event_device *curdev,
+ struct clock_event_device *newdev)
+{
+ if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
+ (newdev->features & CLOCK_EVT_FEAT_C3STOP))
+ return false;
+
+ if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
+ !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
+ return false;
+
+ return !curdev || newdev->rating > curdev->rating;
+}
+
+/*
+ * Conditionally install/replace broadcast device
+ */
+void tick_install_broadcast_device(struct clock_event_device *dev)
{
struct clock_event_device *cur = tick_broadcast_device.evtdev;
- if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
- (tick_broadcast_device.evtdev &&
- tick_broadcast_device.evtdev->rating >= dev->rating) ||
- (dev->features & CLOCK_EVT_FEAT_C3STOP))
- return 0;
+ if (!tick_check_broadcast_device(cur, dev))
+ return;
- clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
+ if (!try_module_get(dev->owner))
+ return;
+
+ clockevents_exchange_device(cur, dev);
if (cur)
cur->event_handler = clockevents_handle_noop;
tick_broadcast_device.evtdev = dev;
@@ -90,7 +109,6 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
*/
if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_clock_notify();
- return 1;
}
/*
@@ -123,8 +141,9 @@ static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
*/
int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
{
+ struct clock_event_device *bc = tick_broadcast_device.evtdev;
unsigned long flags;
- int ret = 0;
+ int ret;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
@@ -138,20 +157,59 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
dev->event_handler = tick_handle_periodic;
tick_device_setup_broadcast_func(dev);
cpumask_set_cpu(cpu, tick_broadcast_mask);
- tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
+ tick_broadcast_start_periodic(bc);
ret = 1;
} else {
/*
- * When the new device is not affected by the stop
- * feature and the cpu is marked in the broadcast mask
- * then clear the broadcast bit.
+ * Clear the broadcast bit for this cpu if the
+ * device is not power state affected.
*/
- if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
- int cpu = smp_processor_id();
+ if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
cpumask_clear_cpu(cpu, tick_broadcast_mask);
- tick_broadcast_clear_oneshot(cpu);
- } else {
+ else
tick_device_setup_broadcast_func(dev);
+
+ /*
+ * Clear the broadcast bit if the CPU is not in
+ * periodic broadcast on state.
+ */
+ if (!cpumask_test_cpu(cpu, tick_broadcast_on))
+ cpumask_clear_cpu(cpu, tick_broadcast_mask);
+
+ switch (tick_broadcast_device.mode) {
+ case TICKDEV_MODE_ONESHOT:
+ /*
+ * If the system is in oneshot mode we can
+ * unconditionally clear the oneshot mask bit,
+ * because the CPU is running and therefore
+ * not in an idle state which causes the power
+ * state affected device to stop. Let the
+ * caller initialize the device.
+ */
+ tick_broadcast_clear_oneshot(cpu);
+ ret = 0;
+ break;
+
+ case TICKDEV_MODE_PERIODIC:
+ /*
+ * If the system is in periodic mode, check
+ * whether the broadcast device can be
+ * switched off now.
+ */
+ if (cpumask_empty(tick_broadcast_mask) && bc)
+ clockevents_shutdown(bc);
+ /*
+ * If we kept the cpu in the broadcast mask,
+ * tell the caller to leave the per cpu device
+ * in shutdown state. The periodic interrupt
+ * is delivered by the broadcast device.
+ */
+ ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
+ break;
+ default:
+ /* Nothing to do */
+ ret = 0;
+ break;
}
}
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
@@ -281,6 +339,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
switch (*reason) {
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
+ cpumask_set_cpu(cpu, tick_broadcast_on);
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
if (tick_broadcast_device.mode ==
TICKDEV_MODE_PERIODIC)
@@ -290,8 +349,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
tick_broadcast_force = 1;
break;
case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
- if (!tick_broadcast_force &&
- cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
+ if (tick_broadcast_force)
+ break;
+ cpumask_clear_cpu(cpu, tick_broadcast_on);
+ if (!tick_device_is_functional(dev))
+ break;
+ if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
if (tick_broadcast_device.mode ==
TICKDEV_MODE_PERIODIC)
tick_setup_periodic(dev, 0);
@@ -349,6 +412,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
bc = tick_broadcast_device.evtdev;
cpumask_clear_cpu(cpu, tick_broadcast_mask);
+ cpumask_clear_cpu(cpu, tick_broadcast_on);
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
if (bc && cpumask_empty(tick_broadcast_mask))
@@ -475,7 +539,15 @@ void tick_check_oneshot_broadcast(int cpu)
if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
- clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
+ /*
+ * We might be in the middle of switching over from
+ * periodic to oneshot. If the CPU has not yet
+ * switched over, leave the device alone.
+ */
+ if (td->mode == TICKDEV_MODE_ONESHOT) {
+ clockevents_set_mode(td->evtdev,
+ CLOCK_EVT_MODE_ONESHOT);
+ }
}
}
@@ -522,6 +594,13 @@ again:
cpumask_clear(tick_broadcast_force_mask);
/*
+ * Sanity check. Catch the case where we try to broadcast to
+ * offline cpus.
+ */
+ if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
+ cpumask_and(tmpmask, tmpmask, cpu_online_mask);
+
+ /*
* Wakeup the cpus which have an expired event.
*/
tick_do_broadcast(tmpmask);
@@ -761,10 +840,12 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
- * Clear the broadcast mask flag for the dead cpu, but do not
- * stop the broadcast device!
+ * Clear the broadcast masks for the dead cpu, but do not stop
+ * the broadcast device!
*/
cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
+ cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
+ cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
@@ -792,6 +873,7 @@ bool tick_broadcast_oneshot_available(void)
void __init tick_broadcast_init(void)
{
zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
+ zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
#ifdef CONFIG_TICK_ONESHOT
zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 5d3fb100bc06..64522ecdfe0e 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -18,6 +18,7 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
+#include <linux/module.h>
#include <asm/irq_regs.h>
@@ -33,7 +34,6 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
ktime_t tick_next_period;
ktime_t tick_period;
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
-static DEFINE_RAW_SPINLOCK(tick_device_lock);
/*
* Debugging: see timer_list.c
@@ -194,7 +194,8 @@ static void tick_setup_device(struct tick_device *td,
* When global broadcasting is active, check if the current
* device is registered as a placeholder for broadcast mode.
* This allows us to handle this x86 misfeature in a generic
- * way.
+ * way. This function also returns !=0 when we keep the
+ * current active broadcast state for this CPU.
*/
if (tick_device_uses_broadcast(newdev, cpu))
return;
@@ -205,17 +206,75 @@ static void tick_setup_device(struct tick_device *td,
tick_setup_oneshot(newdev, handler, next_event);
}
+void tick_install_replacement(struct clock_event_device *newdev)
+{
+ struct tick_device *td = &__get_cpu_var(tick_cpu_device);
+ int cpu = smp_processor_id();
+
+ clockevents_exchange_device(td->evtdev, newdev);
+ tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
+ if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
+ tick_oneshot_notify();
+}
+
+static bool tick_check_percpu(struct clock_event_device *curdev,
+ struct clock_event_device *newdev, int cpu)
+{
+ if (!cpumask_test_cpu(cpu, newdev->cpumask))
+ return false;
+ if (cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
+ return true;
+ /* Check if irq affinity can be set */
+ if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq))
+ return false;
+ /* Prefer an existing cpu local device */
+ if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
+ return false;
+ return true;
+}
+
+static bool tick_check_preferred(struct clock_event_device *curdev,
+ struct clock_event_device *newdev)
+{
+ /* Prefer oneshot capable device */
+ if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) {
+ if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT))
+ return false;
+ if (tick_oneshot_mode_active())
+ return false;
+ }
+
+ /*
+ * Use the higher rated one, but prefer a CPU local device with a lower
+ * rating than a non-CPU local device
+ */
+ return !curdev ||
+ newdev->rating > curdev->rating ||
+ !cpumask_equal(curdev->cpumask, newdev->cpumask);
+}
+
+/*
+ * Check whether the new device is a better fit than curdev. curdev
+ * can be NULL !
+ */
+bool tick_check_replacement(struct clock_event_device *curdev,
+ struct clock_event_device *newdev)
+{
+ if (tick_check_percpu(curdev, newdev, smp_processor_id()))
+ return false;
+
+ return tick_check_preferred(curdev, newdev);
+}
+
/*
- * Check, if the new registered device should be used.
+ * Check, if the new registered device should be used. Called with
+ * clockevents_lock held and interrupts disabled.
*/
-static int tick_check_new_device(struct clock_event_device *newdev)
+void tick_check_new_device(struct clock_event_device *newdev)
{
struct clock_event_device *curdev;
struct tick_device *td;
- int cpu, ret = NOTIFY_OK;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&tick_device_lock, flags);
+ int cpu;
cpu = smp_processor_id();
if (!cpumask_test_cpu(cpu, newdev->cpumask))
@@ -225,40 +284,15 @@ static int tick_check_new_device(struct clock_event_device *newdev)
curdev = td->evtdev;
/* cpu local device ? */
- if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {
-
- /*
- * If the cpu affinity of the device interrupt can not
- * be set, ignore it.
- */
- if (!irq_can_set_affinity(newdev->irq))
- goto out_bc;
+ if (!tick_check_percpu(curdev, newdev, cpu))
+ goto out_bc;
- /*
- * If we have a cpu local device already, do not replace it
- * by a non cpu local device
- */
- if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
- goto out_bc;
- }
+ /* Preference decision */
+ if (!tick_check_preferred(curdev, newdev))
+ goto out_bc;
- /*
- * If we have an active device, then check the rating and the oneshot
- * feature.
- */
- if (curdev) {
- /*
- * Prefer one shot capable devices !
- */
- if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
- !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
- goto out_bc;
- /*
- * Check the rating
- */
- if (curdev->rating >= newdev->rating)
- goto out_bc;
- }
+ if (!try_module_get(newdev->owner))
+ return;
/*
* Replace the eventually existing device by the new
@@ -273,20 +307,13 @@ static int tick_check_new_device(struct clock_event_device *newdev)
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
-
- raw_spin_unlock_irqrestore(&tick_device_lock, flags);
- return NOTIFY_STOP;
+ return;
out_bc:
/*
* Can the new device be used as a broadcast device ?
*/
- if (tick_check_broadcast_device(newdev))
- ret = NOTIFY_STOP;
-
- raw_spin_unlock_irqrestore(&tick_device_lock, flags);
-
- return ret;
+ tick_install_broadcast_device(newdev);
}
/*
@@ -294,7 +321,7 @@ out_bc:
*
* Called with interrupts disabled.
*/
-static void tick_handover_do_timer(int *cpup)
+void tick_handover_do_timer(int *cpup)
{
if (*cpup == tick_do_timer_cpu) {
int cpu = cpumask_first(cpu_online_mask);
@@ -311,13 +338,11 @@ static void tick_handover_do_timer(int *cpup)
* access the hardware device itself.
* We just set the mode and remove it from the lists.
*/
-static void tick_shutdown(unsigned int *cpup)
+void tick_shutdown(unsigned int *cpup)
{
struct tick_device *td = &per_cpu(tick_cpu_device, *cpup);
struct clock_event_device *dev = td->evtdev;
- unsigned long flags;
- raw_spin_lock_irqsave(&tick_device_lock, flags);
td->mode = TICKDEV_MODE_PERIODIC;
if (dev) {
/*
@@ -329,26 +354,20 @@ static void tick_shutdown(unsigned int *cpup)
dev->event_handler = clockevents_handle_noop;
td->evtdev = NULL;
}
- raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
-static void tick_suspend(void)
+void tick_suspend(void)
{
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
- unsigned long flags;
- raw_spin_lock_irqsave(&tick_device_lock, flags);
clockevents_shutdown(td->evtdev);
- raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
-static void tick_resume(void)
+void tick_resume(void)
{
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
- unsigned long flags;
int broadcast = tick_resume_broadcast();
- raw_spin_lock_irqsave(&tick_device_lock, flags);
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
if (!broadcast) {
@@ -357,68 +376,12 @@ static void tick_resume(void)
else
tick_resume_oneshot();
}
- raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
-/*
- * Notification about clock event devices
- */
-static int tick_notify(struct notifier_block *nb, unsigned long reason,
- void *dev)
-{
- switch (reason) {
-
- case CLOCK_EVT_NOTIFY_ADD:
- return tick_check_new_device(dev);
-
- case CLOCK_EVT_NOTIFY_BROADCAST_ON:
- case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
- case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
- tick_broadcast_on_off(reason, dev);
- break;
-
- case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
- case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
- tick_broadcast_oneshot_control(reason);
- break;
-
- case CLOCK_EVT_NOTIFY_CPU_DYING:
- tick_handover_do_timer(dev);
- break;
-
- case CLOCK_EVT_NOTIFY_CPU_DEAD:
- tick_shutdown_broadcast_oneshot(dev);
- tick_shutdown_broadcast(dev);
- tick_shutdown(dev);
- break;
-
- case CLOCK_EVT_NOTIFY_SUSPEND:
- tick_suspend();
- tick_suspend_broadcast();
- break;
-
- case CLOCK_EVT_NOTIFY_RESUME:
- tick_resume();
- break;
-
- default:
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block tick_notifier = {
- .notifier_call = tick_notify,
-};
-
/**
* tick_init - initialize the tick control
- *
- * Register the notifier with the clockevents framework
*/
void __init tick_init(void)
{
- clockevents_register_notifier(&tick_notifier);
tick_broadcast_init();
}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index f0299eae4602..bc906cad709b 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -6,6 +6,8 @@
extern seqlock_t jiffies_lock;
+#define CS_NAME_LEN 32
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
#define TICK_DO_TIMER_NONE -1
@@ -18,9 +20,19 @@ extern int tick_do_timer_cpu __read_mostly;
extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
extern void tick_handle_periodic(struct clock_event_device *dev);
+extern void tick_check_new_device(struct clock_event_device *dev);
+extern void tick_handover_do_timer(int *cpup);
+extern void tick_shutdown(unsigned int *cpup);
+extern void tick_suspend(void);
+extern void tick_resume(void);
+extern bool tick_check_replacement(struct clock_event_device *curdev,
+ struct clock_event_device *newdev);
+extern void tick_install_replacement(struct clock_event_device *dev);
extern void clockevents_shutdown(struct clock_event_device *dev);
+extern size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
+
/*
* NO_HZ / high resolution timer shared code
*/
@@ -90,7 +102,7 @@ static inline bool tick_broadcast_oneshot_available(void) { return false; }
*/
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
-extern int tick_check_broadcast_device(struct clock_event_device *dev);
+extern void tick_install_broadcast_device(struct clock_event_device *dev);
extern int tick_is_broadcast_device(struct clock_event_device *dev);
extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
extern void tick_shutdown_broadcast(unsigned int *cpup);
@@ -102,9 +114,8 @@ tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
#else /* !BROADCAST */
-static inline int tick_check_broadcast_device(struct clock_event_device *dev)
+static inline void tick_install_broadcast_device(struct clock_event_device *dev)
{
- return 0;
}
static inline int tick_is_broadcast_device(struct clock_event_device *dev)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index baeeb5c87cf1..48b9fffabdc2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -25,6 +25,11 @@
#include "tick-internal.h"
#include "ntp_internal.h"
+#include "timekeeping_internal.h"
+
+#define TK_CLEAR_NTP (1 << 0)
+#define TK_MIRROR (1 << 1)
+#define TK_CLOCK_WAS_SET (1 << 2)
static struct timekeeper timekeeper;
static DEFINE_RAW_SPINLOCK(timekeeper_lock);
@@ -200,9 +205,9 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
-static void update_pvclock_gtod(struct timekeeper *tk)
+static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
{
- raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk);
+ raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
}
/**
@@ -216,7 +221,7 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb)
raw_spin_lock_irqsave(&timekeeper_lock, flags);
ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
- update_pvclock_gtod(tk);
+ update_pvclock_gtod(tk, true);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
return ret;
@@ -241,16 +246,16 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
/* must hold timekeeper_lock */
-static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror)
+static void timekeeping_update(struct timekeeper *tk, unsigned int action)
{
- if (clearntp) {
+ if (action & TK_CLEAR_NTP) {
tk->ntp_error = 0;
ntp_clear();
}
update_vsyscall(tk);
- update_pvclock_gtod(tk);
+ update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
- if (mirror)
+ if (action & TK_MIRROR)
memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
}
@@ -508,7 +513,7 @@ int do_settimeofday(const struct timespec *tv)
tk_set_xtime(tk, tv);
- timekeeping_update(tk, true, true);
+ timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -552,7 +557,7 @@ int timekeeping_inject_offset(struct timespec *ts)
tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
error: /* even if we error out, we forwarded the time, so call update */
- timekeeping_update(tk, true, true);
+ timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -627,13 +632,22 @@ static int change_clocksource(void *data)
write_seqcount_begin(&timekeeper_seq);
timekeeping_forward_now(tk);
- if (!new->enable || new->enable(new) == 0) {
- old = tk->clock;
- tk_setup_internals(tk, new);
- if (old->disable)
- old->disable(old);
+ /*
+ * If the cs is in module, get a module reference. Succeeds
+ * for built-in code (owner == NULL) as well.
+ */
+ if (try_module_get(new->owner)) {
+ if (!new->enable || new->enable(new) == 0) {
+ old = tk->clock;
+ tk_setup_internals(tk, new);
+ if (old->disable)
+ old->disable(old);
+ module_put(old->owner);
+ } else {
+ module_put(new->owner);
+ }
}
- timekeeping_update(tk, true, true);
+ timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -648,14 +662,15 @@ static int change_clocksource(void *data)
* This function is called from clocksource.c after a new, better clock
* source has been registered. The caller holds the clocksource_mutex.
*/
-void timekeeping_notify(struct clocksource *clock)
+int timekeeping_notify(struct clocksource *clock)
{
struct timekeeper *tk = &timekeeper;
if (tk->clock == clock)
- return;
+ return 0;
stop_machine(change_clocksource, clock, NULL);
tick_clock_notify();
+ return tk->clock == clock ? 0 : -1;
}
/**
@@ -841,6 +856,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
tk_xtime_add(tk, delta);
tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta));
tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta));
+ tk_debug_account_sleep_time(delta);
}
/**
@@ -872,7 +888,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
__timekeeping_inject_sleeptime(tk, delta);
- timekeeping_update(tk, true, true);
+ timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -954,7 +970,7 @@ static void timekeeping_resume(void)
tk->cycle_last = clock->cycle_last = cycle_now;
tk->ntp_error = 0;
timekeeping_suspended = 0;
- timekeeping_update(tk, false, true);
+ timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
write_seqcount_end(&timekeeper_seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -1236,9 +1252,10 @@ out_adjust:
* It also calls into the NTP code to handle leapsecond processing.
*
*/
-static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
+static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
{
u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
+ unsigned int action = 0;
while (tk->xtime_nsec >= nsecps) {
int leap;
@@ -1261,8 +1278,10 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
clock_was_set_delayed();
+ action = TK_CLOCK_WAS_SET;
}
}
+ return action;
}
/**
@@ -1347,6 +1366,7 @@ static void update_wall_time(void)
struct timekeeper *tk = &shadow_timekeeper;
cycle_t offset;
int shift = 0, maxshift;
+ unsigned int action;
unsigned long flags;
raw_spin_lock_irqsave(&timekeeper_lock, flags);
@@ -1399,7 +1419,7 @@ static void update_wall_time(void)
* Finally, make sure that after the rounding
* xtime_nsec isn't larger than NSEC_PER_SEC
*/
- accumulate_nsecs_to_secs(tk);
+ action = accumulate_nsecs_to_secs(tk);
write_seqcount_begin(&timekeeper_seq);
/* Update clock->cycle_last with the new value */
@@ -1415,7 +1435,7 @@ static void update_wall_time(void)
* updating.
*/
memcpy(real_tk, tk, sizeof(*tk));
- timekeeping_update(real_tk, false, false);
+ timekeeping_update(real_tk, action);
write_seqcount_end(&timekeeper_seq);
out:
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -1677,6 +1697,7 @@ int do_adjtimex(struct timex *txc)
if (tai != orig_tai) {
__timekeeping_set_tai_offset(tk, tai);
+ update_pvclock_gtod(tk, true);
clock_was_set_delayed();
}
write_seqcount_end(&timekeeper_seq);
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
new file mode 100644
index 000000000000..802433a4f5eb
--- /dev/null
+++ b/kernel/time/timekeeping_debug.c
@@ -0,0 +1,72 @@
+/*
+ * debugfs file to track time spent in suspend
+ *
+ * Copyright (c) 2011, Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/time.h>
+
+static unsigned int sleep_time_bin[32] = {0};
+
+static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
+{
+ unsigned int bin;
+ seq_puts(s, " time (secs) count\n");
+ seq_puts(s, "------------------------------\n");
+ for (bin = 0; bin < 32; bin++) {
+ if (sleep_time_bin[bin] == 0)
+ continue;
+ seq_printf(s, "%10u - %-10u %4u\n",
+ bin ? 1 << (bin - 1) : 0, 1 << bin,
+ sleep_time_bin[bin]);
+ }
+ return 0;
+}
+
+static int tk_debug_sleep_time_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, tk_debug_show_sleep_time, NULL);
+}
+
+static const struct file_operations tk_debug_sleep_time_fops = {
+ .open = tk_debug_sleep_time_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init tk_debug_sleep_time_init(void)
+{
+ struct dentry *d;
+
+ d = debugfs_create_file("sleep_time", 0444, NULL, NULL,
+ &tk_debug_sleep_time_fops);
+ if (!d) {
+ pr_err("Failed to create sleep_time debug file\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+late_initcall(tk_debug_sleep_time_init);
+
+void tk_debug_account_sleep_time(struct timespec *t)
+{
+ sleep_time_bin[fls(t->tv_sec)]++;
+}
+
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
new file mode 100644
index 000000000000..13323ea08ffa
--- /dev/null
+++ b/kernel/time/timekeeping_internal.h
@@ -0,0 +1,14 @@
+#ifndef _TIMEKEEPING_INTERNAL_H
+#define _TIMEKEEPING_INTERNAL_H
+/*
+ * timekeeping debug functions
+ */
+#include <linux/time.h>
+
+#ifdef CONFIG_DEBUG_FS
+extern void tk_debug_account_sleep_time(struct timespec *t);
+#else
+#define tk_debug_account_sleep_time(x)
+#endif
+
+#endif /* _TIMEKEEPING_INTERNAL_H */