linux/include/clocksource/hyperv_timer.h
Naman Jain bcc80dec91 x86/hyperv: Fix hv tsc page based sched_clock for hibernation
read_hv_sched_clock_tsc() assumes that the Hyper-V clock counter is
bigger than the variable hv_sched_clock_offset, which is cached during
early boot, but depending on the timing this assumption may be false
when a hibernated VM starts again (the clock counter starts from 0
again) and is resuming back (Note: hv_init_tsc_clocksource() is not
called during hibernation/resume); consequently,
read_hv_sched_clock_tsc() may return a negative integer (which is
interpreted as a huge positive integer since the return type is u64)
and new kernel messages are prefixed with huge timestamps before
read_hv_sched_clock_tsc() grows big enough (which typically takes
several seconds).

Fix the issue by saving the Hyper-V clock counter just before the
suspend, and using it to correct the hv_sched_clock_offset in
resume. This makes hv tsc page based sched_clock continuous and ensures
that post resume, it starts from where it left off during suspend.
Override x86_platform.save_sched_clock_state and
x86_platform.restore_sched_clock_state routines to correct this as soon
as possible.

Note: if Invariant TSC is available, the issue doesn't happen because
1) we don't register read_hv_sched_clock_tsc() for sched clock:
See commit e5313f1c5404 ("clocksource/drivers/hyper-v: Rework
clocksource and sched clock setup");
2) the common x86 code adjusts TSC similarly: see
__restore_processor_state() ->  tsc_verify_tsc_adjust(true) and
x86_platform.restore_sched_clock_state().

Cc: stable@vger.kernel.org
Fixes: 1349401ff1aa ("clocksource/drivers/hyper-v: Suspend/resume Hyper-V clocksource for hibernation")
Co-developed-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Link: https://lore.kernel.org/r/20240917053917.76787-1-namjain@linux.microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20240917053917.76787-1-namjain@linux.microsoft.com>
2024-12-09 18:42:42 +00:00

118 lines
3.3 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Definitions for the clocksource provided by the Hyper-V
* hypervisor to guest VMs, as described in the Hyper-V Top
* Level Functional Spec (TLFS).
*
* Copyright (C) 2019, Microsoft, Inc.
*
* Author: Michael Kelley <mikelley@microsoft.com>
*/
#ifndef __CLKSOURCE_HYPERV_TIMER_H
#define __CLKSOURCE_HYPERV_TIMER_H
#include <linux/clocksource.h>
#include <linux/math64.h>
#include <asm/hyperv-tlfs.h>
#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
#define HV_MIN_DELTA_TICKS 1
#ifdef CONFIG_HYPERV_TIMER
#include <asm/hyperv_timer.h>
/* Routines called by the VMbus driver */
extern int hv_stimer_alloc(bool have_percpu_irqs);
extern int hv_stimer_cleanup(unsigned int cpu);
extern void hv_stimer_legacy_init(unsigned int cpu, int sint);
extern void hv_stimer_legacy_cleanup(unsigned int cpu);
extern void hv_stimer_global_cleanup(void);
extern void hv_stimer0_isr(void);
extern void hv_init_clocksource(void);
extern void hv_remap_tsc_clocksource(void);
extern unsigned long hv_get_tsc_pfn(void);
extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
extern void hv_adj_sched_clock_offset(u64 offset);
static __always_inline bool
hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
u64 *cur_tsc, u64 *time)
{
u64 scale, offset;
u32 sequence;
/*
* The protocol for reading Hyper-V TSC page is specified in Hypervisor
* Top-Level Functional Specification ver. 3.0 and above. To get the
* reference time we must do the following:
* - READ ReferenceTscSequence
* A special '0' value indicates the time source is unreliable and we
* need to use something else. The currently published specification
* versions (up to 4.0b) contain a mistake and wrongly claim '-1'
* instead of '0' as the special value, see commit c35b82ef0294.
* - ReferenceTime =
* ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
* - READ ReferenceTscSequence again. In case its value has changed
* since our first reading we need to discard ReferenceTime and repeat
* the whole sequence as the hypervisor was updating the page in
* between.
*/
do {
sequence = READ_ONCE(tsc_pg->tsc_sequence);
if (!sequence)
return false;
/*
* Make sure we read sequence before we read other values from
* TSC page.
*/
smp_rmb();
scale = READ_ONCE(tsc_pg->tsc_scale);
offset = READ_ONCE(tsc_pg->tsc_offset);
*cur_tsc = hv_get_raw_timer();
/*
* Make sure we read sequence after we read all other values
* from TSC page.
*/
smp_rmb();
} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
*time = mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
return true;
}
#else /* CONFIG_HYPERV_TIMER */
static inline unsigned long hv_get_tsc_pfn(void)
{
return 0;
}
static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return NULL;
}
static __always_inline bool
hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc, u64 *time)
{
return false;
}
static inline int hv_stimer_cleanup(unsigned int cpu) { return 0; }
static inline void hv_stimer_legacy_init(unsigned int cpu, int sint) {}
static inline void hv_stimer_legacy_cleanup(unsigned int cpu) {}
static inline void hv_stimer_global_cleanup(void) {}
static inline void hv_stimer0_isr(void) {}
#endif /* CONFIG_HYPERV_TIMER */
#endif