diff options
author | Linus Torvalds | 2011-10-26 17:15:03 +0200 |
---|---|---|
committer | Linus Torvalds | 2011-10-26 17:15:03 +0200 |
commit | 39adff5f69d6849ca22353a88058c9f8630528c0 (patch) | |
tree | b0c2d2de77ebc5c97fd19c29b81eeb03549553f8 /kernel/time | |
parent | 8a4a8918ed6e4a361f4df19f199bbc2d0a89a46c (diff) | |
parent | e35f95b36e43f67a6f806172555a152c11ea0a78 (diff) |
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
time, s390: Get rid of compile warning
dw_apb_timer: constify clocksource name
time: Cleanup old CONFIG_GENERIC_TIME references that snuck in
time: Change jiffies_to_clock_t() argument type to unsigned long
alarmtimers: Fix error handling
clocksource: Make watchdog reset lockless
posix-cpu-timers: Cure SMP accounting oddities
s390: Use direct ktime path for s390 clockevent device
clockevents: Add direct ktime programming function
clockevents: Make minimum delay adjustments configurable
nohz: Remove "Switched to NOHz mode" debugging messages
proc: Consider NO_HZ when printing idle and iowait times
nohz: Make idle/iowait counter update conditional
nohz: Fix update_ts_time_stat idle accounting
cputime: Clean up cputime_to_usecs and usecs_to_cputime macros
alarmtimers: Rework RTC device selection using class interface
alarmtimers: Add try_to_cancel functionality
alarmtimers: Add more refined alarm state tracking
alarmtimers: Remove period from alarm structure
alarmtimers: Remove interval cap limit hack
...
Diffstat (limited to 'kernel/time')
-rw-r--r-- | kernel/time/Kconfig | 2 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 266 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 129 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 38 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-internal.h | 2 | ||||
-rw-r--r-- | kernel/time/tick-oneshot.c | 77 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 55 |
9 files changed, 364 insertions, 213 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index f06a8a365648..b26c2228fe92 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -27,3 +27,5 @@ config GENERIC_CLOCKEVENTS_BUILD default y depends on GENERIC_CLOCKEVENTS || GENERIC_CLOCKEVENTS_MIGR +config GENERIC_CLOCKEVENTS_MIN_ADJUST + bool diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index ea5e1a928d5b..c436e790b21b 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -53,27 +53,6 @@ static struct rtc_device *rtcdev; static DEFINE_SPINLOCK(rtcdev_lock); /** - * has_wakealarm - check rtc device has wakealarm ability - * @dev: current device - * @name_ptr: name to be returned - * - * This helper function checks to see if the rtc device can wake - * from suspend. - */ -static int has_wakealarm(struct device *dev, void *name_ptr) -{ - struct rtc_device *candidate = to_rtc_device(dev); - - if (!candidate->ops->set_alarm) - return 0; - if (!device_may_wakeup(candidate->dev.parent)) - return 0; - - *(const char **)name_ptr = dev_name(dev); - return 1; -} - -/** * alarmtimer_get_rtcdev - Return selected rtcdevice * * This function returns the rtc device to use for wakealarms. @@ -82,37 +61,64 @@ static int has_wakealarm(struct device *dev, void *name_ptr) */ static struct rtc_device *alarmtimer_get_rtcdev(void) { - struct device *dev; - char *str; unsigned long flags; struct rtc_device *ret; spin_lock_irqsave(&rtcdev_lock, flags); - if (!rtcdev) { - /* Find an rtc device and init the rtc_timer */ - dev = class_find_device(rtc_class, NULL, &str, has_wakealarm); - /* If we have a device then str is valid. See has_wakealarm() */ - if (dev) { - rtcdev = rtc_class_open(str); - /* - * Drop the reference we got in class_find_device, - * rtc_open takes its own. - */ - put_device(dev); - rtc_timer_init(&rtctimer, NULL, NULL); - } - } ret = rtcdev; spin_unlock_irqrestore(&rtcdev_lock, flags); return ret; } + + +static int alarmtimer_rtc_add_device(struct device *dev, + struct class_interface *class_intf) +{ + unsigned long flags; + struct rtc_device *rtc = to_rtc_device(dev); + + if (rtcdev) + return -EBUSY; + + if (!rtc->ops->set_alarm) + return -1; + if (!device_may_wakeup(rtc->dev.parent)) + return -1; + + spin_lock_irqsave(&rtcdev_lock, flags); + if (!rtcdev) { + rtcdev = rtc; + /* hold a reference so it doesn't go away */ + get_device(dev); + } + spin_unlock_irqrestore(&rtcdev_lock, flags); + return 0; +} + +static struct class_interface alarmtimer_rtc_interface = { + .add_dev = &alarmtimer_rtc_add_device, +}; + +static int alarmtimer_rtc_interface_setup(void) +{ + alarmtimer_rtc_interface.class = rtc_class; + return class_interface_register(&alarmtimer_rtc_interface); +} +static void alarmtimer_rtc_interface_remove(void) +{ + class_interface_unregister(&alarmtimer_rtc_interface); +} #else -#define alarmtimer_get_rtcdev() (0) -#define rtcdev (0) +static inline struct rtc_device *alarmtimer_get_rtcdev(void) +{ + return NULL; +} +#define rtcdev (NULL) +static inline int alarmtimer_rtc_interface_setup(void) { return 0; } +static inline void alarmtimer_rtc_interface_remove(void) { } #endif - /** * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue * @base: pointer to the base where the timer is being run @@ -126,6 +132,8 @@ static struct rtc_device *alarmtimer_get_rtcdev(void) static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) { timerqueue_add(&base->timerqueue, &alarm->node); + alarm->state |= ALARMTIMER_STATE_ENQUEUED; + if (&alarm->node == timerqueue_getnext(&base->timerqueue)) { hrtimer_try_to_cancel(&base->timer); hrtimer_start(&base->timer, alarm->node.expires, @@ -147,7 +155,12 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) { struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue); + if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) + return; + timerqueue_del(&base->timerqueue, &alarm->node); + alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; + if (next == &alarm->node) { hrtimer_try_to_cancel(&base->timer); next = timerqueue_getnext(&base->timerqueue); @@ -174,6 +187,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) unsigned long flags; ktime_t now; int ret = HRTIMER_NORESTART; + int restart = ALARMTIMER_NORESTART; spin_lock_irqsave(&base->lock, flags); now = base->gettime(); @@ -187,17 +201,19 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) alarm = container_of(next, struct alarm, node); timerqueue_del(&base->timerqueue, &alarm->node); - alarm->enabled = 0; - /* Re-add periodic timers */ - if (alarm->period.tv64) { - alarm->node.expires = ktime_add(expired, alarm->period); - timerqueue_add(&base->timerqueue, &alarm->node); - alarm->enabled = 1; - } + alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; + + alarm->state |= ALARMTIMER_STATE_CALLBACK; spin_unlock_irqrestore(&base->lock, flags); if (alarm->function) - alarm->function(alarm); + restart = alarm->function(alarm, now); spin_lock_irqsave(&base->lock, flags); + alarm->state &= ~ALARMTIMER_STATE_CALLBACK; + + if (restart != ALARMTIMER_NORESTART) { + timerqueue_add(&base->timerqueue, &alarm->node); + alarm->state |= ALARMTIMER_STATE_ENQUEUED; + } } if (next) { @@ -234,7 +250,7 @@ static int alarmtimer_suspend(struct device *dev) freezer_delta = ktime_set(0, 0); spin_unlock_irqrestore(&freezer_delta_lock, flags); - rtc = rtcdev; + rtc = alarmtimer_get_rtcdev(); /* If we have no rtcdev, just return */ if (!rtc) return 0; @@ -299,53 +315,111 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) * @function: callback that is run when the alarm fires */ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, - void (*function)(struct alarm *)) + enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) { timerqueue_init(&alarm->node); - alarm->period = ktime_set(0, 0); alarm->function = function; alarm->type = type; - alarm->enabled = 0; + alarm->state = ALARMTIMER_STATE_INACTIVE; } /** * alarm_start - Sets an alarm to fire * @alarm: ptr to alarm to set * @start: time to run the alarm - * @period: period at which the alarm will recur */ -void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period) +void alarm_start(struct alarm *alarm, ktime_t start) { struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; spin_lock_irqsave(&base->lock, flags); - if (alarm->enabled) + if (alarmtimer_active(alarm)) alarmtimer_remove(base, alarm); alarm->node.expires = start; - alarm->period = period; alarmtimer_enqueue(base, alarm); - alarm->enabled = 1; spin_unlock_irqrestore(&base->lock, flags); } /** - * alarm_cancel - Tries to cancel an alarm timer + * alarm_try_to_cancel - Tries to cancel an alarm timer * @alarm: ptr to alarm to be canceled + * + * Returns 1 if the timer was canceled, 0 if it was not running, + * and -1 if the callback was running */ -void alarm_cancel(struct alarm *alarm) +int alarm_try_to_cancel(struct alarm *alarm) { struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; - + int ret = -1; spin_lock_irqsave(&base->lock, flags); - if (alarm->enabled) + + if (alarmtimer_callback_running(alarm)) + goto out; + + if (alarmtimer_is_queued(alarm)) { alarmtimer_remove(base, alarm); - alarm->enabled = 0; + ret = 1; + } else + ret = 0; +out: spin_unlock_irqrestore(&base->lock, flags); + return ret; +} + + +/** + * alarm_cancel - Spins trying to cancel an alarm timer until it is done + * @alarm: ptr to alarm to be canceled + * + * Returns 1 if the timer was canceled, 0 if it was not active. + */ +int alarm_cancel(struct alarm *alarm) +{ + for (;;) { + int ret = alarm_try_to_cancel(alarm); + if (ret >= 0) + return ret; + cpu_relax(); + } +} + + +u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) +{ + u64 overrun = 1; + ktime_t delta; + + delta = ktime_sub(now, alarm->node.expires); + + if (delta.tv64 < 0) + return 0; + + if (unlikely(delta.tv64 >= interval.tv64)) { + s64 incr = ktime_to_ns(interval); + + overrun = ktime_divns(delta, incr); + + alarm->node.expires = ktime_add_ns(alarm->node.expires, + incr*overrun); + + if (alarm->node.expires.tv64 > now.tv64) + return overrun; + /* + * This (and the ktime_add() below) is the + * correction for exact: + */ + overrun++; + } + + alarm->node.expires = ktime_add(alarm->node.expires, interval); + return overrun; } + + /** * clock2alarm - helper that converts from clockid to alarmtypes * @clockid: clockid. @@ -365,12 +439,21 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid) * * Posix timer callback for expired alarm timers. */ -static void alarm_handle_timer(struct alarm *alarm) +static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, + ktime_t now) { struct k_itimer *ptr = container_of(alarm, struct k_itimer, - it.alarmtimer); + it.alarm.alarmtimer); if (posix_timer_event(ptr, 0) != 0) ptr->it_overrun++; + + /* Re-add periodic timers */ + if (ptr->it.alarm.interval.tv64) { + ptr->it_overrun += alarm_forward(alarm, now, + ptr->it.alarm.interval); + return ALARMTIMER_RESTART; + } + return ALARMTIMER_NORESTART; } /** @@ -427,7 +510,7 @@ static int alarm_timer_create(struct k_itimer *new_timer) type = clock2alarm(new_timer->it_clock); base = &alarm_bases[type]; - alarm_init(&new_timer->it.alarmtimer, type, alarm_handle_timer); + alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer); return 0; } @@ -444,9 +527,9 @@ static void alarm_timer_get(struct k_itimer *timr, memset(cur_setting, 0, sizeof(struct itimerspec)); cur_setting->it_interval = - ktime_to_timespec(timr->it.alarmtimer.period); + ktime_to_timespec(timr->it.alarm.interval); cur_setting->it_value = - ktime_to_timespec(timr->it.alarmtimer.node.expires); + ktime_to_timespec(timr->it.alarm.alarmtimer.node.expires); return; } @@ -461,7 +544,9 @@ static int alarm_timer_del(struct k_itimer *timr) if (!rtcdev) return -ENOTSUPP; - alarm_cancel(&timr->it.alarmtimer); + if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0) + return TIMER_RETRY; + return 0; } @@ -481,25 +566,17 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, if (!rtcdev) return -ENOTSUPP; - /* - * XXX HACK! Currently we can DOS a system if the interval - * period on alarmtimers is too small. Cap the interval here - * to 100us and solve this properly in a future patch! -jstultz - */ - if ((new_setting->it_interval.tv_sec == 0) && - (new_setting->it_interval.tv_nsec < 100000)) - new_setting->it_interval.tv_nsec = 100000; - if (old_setting) alarm_timer_get(timr, old_setting); /* If the timer was already set, cancel it */ - alarm_cancel(&timr->it.alarmtimer); + if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0) + return TIMER_RETRY; /* start the timer */ - alarm_start(&timr->it.alarmtimer, - timespec_to_ktime(new_setting->it_value), - timespec_to_ktime(new_setting->it_interval)); + timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); + alarm_start(&timr->it.alarm.alarmtimer, + timespec_to_ktime(new_setting->it_value)); return 0; } @@ -509,13 +586,15 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, * * Wakes up the task that set the alarmtimer */ -static void alarmtimer_nsleep_wakeup(struct alarm *alarm) +static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm, + ktime_t now) { struct task_struct *task = (struct task_struct *)alarm->data; alarm->data = NULL; if (task) wake_up_process(task); + return ALARMTIMER_NORESTART; } /** @@ -530,7 +609,7 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp) alarm->data = (void *)current; do { set_current_state(TASK_INTERRUPTIBLE); - alarm_start(alarm, absexp, ktime_set(0, 0)); + alarm_start(alarm, absexp); if (likely(alarm->data)) schedule(); @@ -691,6 +770,7 @@ static struct platform_driver alarmtimer_driver = { */ static int __init alarmtimer_init(void) { + struct platform_device *pdev; int error = 0; int i; struct k_clock alarm_clock = { @@ -719,10 +799,26 @@ static int __init alarmtimer_init(void) HRTIMER_MODE_ABS); alarm_bases[i].timer.function = alarmtimer_fired; } + + error = alarmtimer_rtc_interface_setup(); + if (error) + return error; + error = platform_driver_register(&alarmtimer_driver); - platform_device_register_simple("alarmtimer", -1, NULL, 0); + if (error) + goto out_if; + pdev = platform_device_register_simple("alarmtimer", -1, NULL, 0); + if (IS_ERR(pdev)) { + error = PTR_ERR(pdev); + goto out_drv; + } + return 0; + +out_drv: + platform_driver_unregister(&alarmtimer_driver); +out_if: + alarmtimer_rtc_interface_remove(); return error; } device_initcall(alarmtimer_init); - diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index e4c699dfa4e8..1ecd6ba36d6c 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -94,42 +94,143 @@ void clockevents_shutdown(struct clock_event_device *dev) dev->next_event.tv64 = KTIME_MAX; } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST + +/* Limit min_delta to a jiffie */ +#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) + +/** + * clockevents_increase_min_delta - raise minimum delta of a clock event device + * @dev: device to increase the minimum delta + * + * Returns 0 on success, -ETIME when the minimum delta reached the limit. + */ +static int clockevents_increase_min_delta(struct clock_event_device *dev) +{ + /* Nothing to do if we already reached the limit */ + if (dev->min_delta_ns >= MIN_DELTA_LIMIT) { + printk(KERN_WARNING "CE: Reprogramming failure. Giving up\n"); + dev->next_event.tv64 = KTIME_MAX; + return -ETIME; + } + + if (dev->min_delta_ns < 5000) + dev->min_delta_ns = 5000; + else + dev->min_delta_ns += dev->min_delta_ns >> 1; + + if (dev->min_delta_ns > MIN_DELTA_LIMIT) + dev->min_delta_ns = MIN_DELTA_LIMIT; + + printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", + dev->name ? dev->name : "?", + (unsigned long long) dev->min_delta_ns); + return 0; +} + +/** + * clockevents_program_min_delta - Set clock event device to the minimum delay. + * @dev: device to program + * + * Returns 0 on success, -ETIME when the retry loop failed. + */ +static int clockevents_program_min_delta(struct clock_event_device *dev) +{ + unsigned long long clc; + int64_t delta; + int i; + + for (i = 0;;) { + delta = dev->min_delta_ns; + dev->next_event = ktime_add_ns(ktime_get(), delta); + + if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + return 0; + + dev->retries++; + clc = ((unsigned long long) delta * dev->mult) >> dev->shift; + if (dev->set_next_event((unsigned long) clc, dev) == 0) + return 0; + + if (++i > 2) { + /* + * We tried 3 times to program the device with the + * given min_delta_ns. Try to increase the minimum + * delta, if that fails as well get out of here. + */ + if (clockevents_increase_min_delta(dev)) + return -ETIME; + i = 0; + } + } +} + +#else /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */ + +/** + * clockevents_program_min_delta - Set clock event device to the minimum delay. + * @dev: device to program + * + * Returns 0 on success, -ETIME when the retry loop failed. + */ +static int clockevents_program_min_delta(struct clock_event_device *dev) +{ + unsigned long long clc; + int64_t delta; + + delta = dev->min_delta_ns; + dev->next_event = ktime_add_ns(ktime_get(), delta); + + if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + return 0; + + dev->retries++; + clc = ((unsigned long long) delta * dev->mult) >> dev->shift; + return dev->set_next_event((unsigned long) clc, dev); +} + +#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */ + /** * clockevents_program_event - Reprogram the clock event device. + * @dev: device to program * @expires: absolute expiry time (monotonic clock) + * @force: program minimum delay if expires can not be set * * Returns 0 on success, -ETIME when the event is in the past. */ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, - ktime_t now) + bool force) { unsigned long long clc; int64_t delta; + int rc; if (unlikely(expires.tv64 < 0)) { WARN_ON_ONCE(1); return -ETIME; } - delta = ktime_to_ns(ktime_sub(expires, now)); - - if (delta <= 0) - return -ETIME; - dev->next_event = expires; if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) return 0; - if (delta > dev->max_delta_ns) - delta = dev->max_delta_ns; - if (delta < dev->min_delta_ns) - delta = dev->min_delta_ns; + /* Shortcut for clockevent devices that can deal with ktime. */ + if (dev->features & CLOCK_EVT_FEAT_KTIME) + return dev->set_next_ktime(expires, dev); + + delta = ktime_to_ns(ktime_sub(expires, ktime_get())); + if (delta <= 0) + return force ? clockevents_program_min_delta(dev) : -ETIME; - clc = delta * dev->mult; - clc >>= dev->shift; + delta = min(delta, (int64_t) dev->max_delta_ns); + delta = max(delta, (int64_t) dev->min_delta_ns); - return dev->set_next_event((unsigned long) clc, dev); + clc = ((unsigned long long) delta * dev->mult) >> dev->shift; + rc = dev->set_next_event((unsigned long) clc, dev); + + return (rc && force) ? clockevents_program_min_delta(dev) : rc; } /** @@ -258,7 +359,7 @@ int clockevents_update_freq(struct clock_event_device *dev, u32 freq) if (dev->mode != CLOCK_EVT_MODE_ONESHOT) return 0; - return clockevents_program_event(dev, dev->next_event, ktime_get()); + return clockevents_program_event(dev, dev->next_event, false); } /* diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e0980f0d9a0a..cf52fda2e096 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -186,6 +186,7 @@ static struct timer_list watchdog_timer; static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); static int watchdog_running; +static atomic_t watchdog_reset_pending; static int clocksource_watchdog_kthread(void *data); static void __clocksource_change_rating(struct clocksource *cs, int rating); @@ -247,12 +248,14 @@ static void clocksource_watchdog(unsigned long data) struct clocksource *cs; cycle_t csnow, wdnow; int64_t wd_nsec, cs_nsec; - int next_cpu; + int next_cpu, reset_pending; spin_lock(&watchdog_lock); if (!watchdog_running) goto out; + reset_pending = atomic_read(&watchdog_reset_pending); + list_for_each_entry(cs, &watchdog_list, wd_list) { /* Clocksource already marked unstable? */ @@ -268,7 +271,8 @@ static void clocksource_watchdog(unsigned long data) local_irq_enable(); /* Clocksource initialized ? */ - if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { + if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || + atomic_read(&watchdog_reset_pending)) { cs->flags |= CLOCK_SOURCE_WATCHDOG; cs->wd_last = wdnow; cs->cs_last = csnow; @@ -283,8 +287,11 @@ static void clocksource_watchdog(unsigned long data) cs->cs_last = csnow; cs->wd_last = wdnow; + if (atomic_read(&watchdog_reset_pending)) + continue; + /* Check the deviation from the watchdog clocksource. */ - if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { + if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { clocksource_unstable(cs, cs_nsec - wd_nsec); continue; } @@ -303,6 +310,13 @@ static void clocksource_watchdog(unsigned long data) } /* + * We only clear the watchdog_reset_pending, when we did a + * full cycle through all clocksources. + */ + if (reset_pending) + atomic_dec(&watchdog_reset_pending); + + /* * Cycle through CPUs to check if the CPUs stay synchronized * to each other. */ @@ -344,23 +358,7 @@ static inline void clocksource_reset_watchdog(void) static void clocksource_resume_watchdog(void) { - unsigned long flags; - - /* - * We use trylock here to avoid a potential dead lock when - * kgdb calls this code after the kernel has been stopped with - * watchdog_lock held. When watchdog_lock is held we just - * return and accept, that the watchdog might trigger and mark - * the monitored clock source (usually TSC) unstable. - * - * This does not affect the other caller clocksource_resume() - * because at this point the kernel is UP, interrupts are - * disabled and nothing can hold watchdog_lock. - */ - if (!spin_trylock_irqsave(&watchdog_lock, flags)) - return; - clocksource_reset_watchdog(); - spin_unlock_irqrestore(&watchdog_lock, flags); + atomic_inc(&watchdog_reset_pending); } static void clocksource_enqueue_watchdog(struct clocksource *cs) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index c7218d132738..f954282d9a82 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -194,7 +194,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) for (next = dev->next_event; ;) { next = ktime_add(next, tick_period); - if (!clockevents_program_event(dev, next, ktime_get())) + if (!clockevents_program_event(dev, next, false)) return; tick_do_periodic_broadcast(); } @@ -373,7 +373,7 @@ static int tick_broadcast_set_event(ktime_t expires, int force) { struct clock_event_device *bc = tick_broadcast_device.evtdev; - return tick_dev_program_event(bc, expires, force); + return clockevents_program_event(bc, expires, force); } int tick_resume_broadcast_oneshot(struct clock_event_device *bc) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 119528de8235..da6c9ecad4e4 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -94,7 +94,7 @@ void tick_handle_periodic(struct clock_event_device *dev) */ next = ktime_add(dev->next_event, tick_period); for (;;) { - if (!clockevents_program_event(dev, next, ktime_get())) + if (!clockevents_program_event(dev, next, false)) return; /* * Have to be careful here. If we're in oneshot mode, @@ -137,7 +137,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); for (;;) { - if (!clockevents_program_event(dev, next, ktime_get())) + if (!clockevents_program_event(dev, next, false)) return; next = ktime_add(next, tick_period); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 1009b06d6f89..4e265b901fed 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -26,8 +26,6 @@ extern void clockevents_shutdown(struct clock_event_device *dev); extern void tick_setup_oneshot(struct clock_event_device *newdev, void (*handler)(struct clock_event_device *), ktime_t nextevt); -extern int tick_dev_program_event(struct clock_event_device *dev, - ktime_t expires, int force); extern int tick_program_event(ktime_t expires, int force); extern void tick_oneshot_notify(void); extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 2d04411a5f05..824109060a33 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -21,74 +21,6 @@ #include "tick-internal.h" -/* Limit min_delta to a jiffie */ -#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) - -static int tick_increase_min_delta(struct clock_event_device *dev) -{ - /* Nothing to do if we already reached the limit */ - if (dev->min_delta_ns >= MIN_DELTA_LIMIT) - return -ETIME; - - if (dev->min_delta_ns < 5000) - dev->min_delta_ns = 5000; - else - dev->min_delta_ns += dev->min_delta_ns >> 1; - - if (dev->min_delta_ns > MIN_DELTA_LIMIT) - dev->min_delta_ns = MIN_DELTA_LIMIT; - - printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", - dev->name ? dev->name : "?", - (unsigned long long) dev->min_delta_ns); - return 0; -} - -/** - * tick_program_event internal worker function - */ -int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, - int force) -{ - ktime_t now = ktime_get(); - int i; - - for (i = 0;;) { - int ret = clockevents_program_event(dev, expires, now); - - if (!ret || !force) - return ret; - - dev->retries++; - /* - * We tried 3 times to program the device with the given - * min_delta_ns. If that's not working then we increase it - * and emit a warning. - */ - if (++i > 2) { - /* Increase the min. delta and try again */ - if (tick_increase_min_delta(dev)) { - /* - * Get out of the loop if min_delta_ns - * hit the limit already. That's - * better than staying here forever. - * - * We clear next_event so we have a - * chance that the box survives. - */ - printk(KERN_WARNING - "CE: Reprogramming failure. Giving up\n"); - dev->next_event.tv64 = KTIME_MAX; - return -ETIME; - } - i = 0; - } - - now = ktime_get(); - expires = ktime_add_ns(now, dev->min_delta_ns); - } -} - /** * tick_program_event */ @@ -96,7 +28,7 @@ int tick_program_event(ktime_t expires, int force) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); - return tick_dev_program_event(dev, expires, force); + return clockevents_program_event(dev, expires, force); } /** @@ -104,11 +36,10 @@ int tick_program_event(ktime_t expires, int force) */ void tick_resume_oneshot(void) { - struct tick_device *td = &__get_cpu_var(tick_cpu_device); - struct clock_event_device *dev = td->evtdev; + struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); - tick_program_event(ktime_get(), 1); + clockevents_program_event(dev, ktime_get(), true); } /** @@ -120,7 +51,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev, { newdev->event_handler = handler; clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); - tick_dev_program_event(newdev, next_event, 1); + clockevents_program_event(newdev, next_event, true); } /** diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index eb98e55196b9..40420644d0ba 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -158,9 +158,10 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda if (ts->idle_active) { delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); if (nr_iowait_cpu(cpu) > 0) ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); + else + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_entrytime = now; } @@ -196,11 +197,11 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) /** * get_cpu_idle_time_us - get the total idle time of a cpu * @cpu: CPU number to query - * @last_update_time: variable to store update time in + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. * * Return the cummulative idle time (since boot) for a given - * CPU, in microseconds. The idle time returned includes - * the iowait time (unlike what "top" and co report). + * CPU, in microseconds. * * This time is measured via accounting rather than sampling, * and is as accurate as ktime_get() is. @@ -210,20 +211,35 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, idle; if (!tick_nohz_enabled) return -1; - update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); + now = ktime_get(); + if (last_update_time) { + update_ts_time_stats(cpu, ts, now, last_update_time); + idle = ts->idle_sleeptime; + } else { + if (ts->idle_active && !nr_iowait_cpu(cpu)) { + ktime_t delta = ktime_sub(now, ts->idle_entrytime); + + idle = ktime_add(ts->idle_sleeptime, delta); + } else { + idle = ts->idle_sleeptime; + } + } + + return ktime_to_us(idle); - return ktime_to_us(ts->idle_sleeptime); } EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); -/* +/** * get_cpu_iowait_time_us - get the total iowait time of a cpu * @cpu: CPU number to query - * @last_update_time: variable to store update time in + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. * * Return the cummulative iowait time (since boot) for a given * CPU, in microseconds. @@ -236,13 +252,26 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, iowait; if (!tick_nohz_enabled) return -1; - update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); + now = ktime_get(); + if (last_update_time) { + update_ts_time_stats(cpu, ts, now, last_update_time); + iowait = ts->iowait_sleeptime; + } else { + if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { + ktime_t delta = ktime_sub(now, ts->idle_entrytime); - return ktime_to_us(ts->iowait_sleeptime); + iowait = ktime_add(ts->iowait_sleeptime, delta); + } else { + iowait = ts->iowait_sleeptime; + } + } + + return ktime_to_us(iowait); } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); @@ -634,8 +663,6 @@ static void tick_nohz_switch_to_nohz(void) next = ktime_add(next, tick_period); } local_irq_enable(); - - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); } /* @@ -787,10 +814,8 @@ void tick_setup_sched_timer(void) } #ifdef CONFIG_NO_HZ - if (tick_nohz_enabled) { + if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); - } #endif } #endif /* HIGH_RES_TIMERS */ |