From 0b3db982904b771460a5717dfd5f0fb7bb580073 Mon Sep 17 00:00:00 2001 From: Qinxin Xia Date: Tue, 13 May 2025 20:34:30 +0800 Subject: [PATCH 1/2] watchdog/perf: Provide function for adjusting the event period driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC7CQP ---------------------------------------------------------------------- Architecture's using perf events for hard lockup detection needs to convert the watchdog_thresh to the event's period, some architecture for example arm64 perform this conversion using the CPU's maximum frequency which will be acquired by cpufreq. However by the time the lockup detector's initialized the cpufreq driver may not be initialized, thus launch a watchdog with inaccurate period. Provide a function hardlockup_detector_perf_adjust_period() to allowing adjust the event period. Then architecture can update with more accurate period if cpufreq is initialized. Signed-off-by: Yicong Yang Reviewed-by: Yicong Yang Signed-off-by: Hongye Lin Signed-off-by: Qinxin Xia --- include/linux/nmi.h | 2 ++ kernel/watchdog_perf.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 08422f713580b..25fe091e1d196 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -105,10 +105,12 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_cleanup(void); +extern void hardlockup_detector_perf_adjust_period(int cpu, u64 period); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } +static inline void hardlockup_detector_perf_adjust_period(int cpu, u64 period) { } #endif void watchdog_hardlockup_stop(void); diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index 0052afe18b7fc..989d30f80951e 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -198,6 +198,29 @@ void hardlockup_detector_perf_cleanup(void) cpumask_clear(&dead_events_mask); } +/** + * hardlockup_detector_perf_adjust_period - Adjust the event period due + * to cpu frequency change + * @cpu: The CPU whose event period will be adjusted + * @period: The target period to be set + */ +void hardlockup_detector_perf_adjust_period(int cpu, u64 period) +{ + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) + return; + + if (!event) + return; + + if (event->attr.sample_period == period) + return; + + if (perf_event_period(event, period)) + pr_err("failed to change period to %llu\n", period); +} + /** * hardlockup_detector_perf_stop - Globally stop watchdog events * -- Gitee From 11a0bff3e57b90f82c2a215fff2e990e0c8516b7 Mon Sep 17 00:00:00 2001 From: Qinxin Xia Date: Tue, 13 May 2025 20:34:31 +0800 Subject: [PATCH 2/2] arm64/watchdog_hld: Add a cpufreq notifier for update watchdog thresh driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC7CQP ---------------------------------------------------------------------- arm64 depends on the cpufreq driver to gain the maximum cpu frequency to convert the watchdog_thresh to perf event period. cpufreq drivers like cppc_cpufreq will be initialized lately after the initializing of the hard lockup detector so just use a safe cpufreq which will be inaccurency. Use a cpufreq notifier to adjust the event's period to a more accurate one. Signed-off-by: Yicong Yang Reviewed-by: Yicong Yang Signed-off-by: Hongye Lin Signed-off-by: Qinxin Xia --- arch/arm64/kernel/watchdog_hld.c | 58 ++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c index 817f0b7f6f925..ae0da38214fa8 100644 --- a/arch/arm64/kernel/watchdog_hld.c +++ b/arch/arm64/kernel/watchdog_hld.c @@ -35,3 +35,61 @@ bool __init arch_perf_nmi_is_available(void) */ return arm_pmu_irq_is_nmi(); } + +static int watchdog_perf_update_period(void *data) +{ + int cpu = raw_smp_processor_id(); + u64 max_cpu_freq, new_period; + + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; + if (!max_cpu_freq) + return 0; + + new_period = watchdog_thresh * max_cpu_freq; + hardlockup_detector_perf_adjust_period(cpu, new_period); + + return 0; +} + +static int watchdog_freq_notifier_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_policy *policy = data; + int cpu; + + if (val != CPUFREQ_CREATE_POLICY) + return NOTIFY_DONE; + + /* + * Let each online CPU related to the policy update the period by their + * own. This will serialize with the framework on start/stop the lockup + * detector (softlockup_{start,stop}_all) and avoid potential race + * condition. Otherwise we may have below theoretical race condition: + * (core 0/1 share the same policy) + * [core 0] [core 1] + * hardlockup_detector_event_create() + * hw_nmi_get_sample_period() + * (cpufreq registered, notifier callback invoked) + * watchdog_freq_notifier_callback() + * watchdog_perf_update_period() + * (since core 1's event's not yet created, + * the period is not set) + * perf_event_create_kernel_counter() + * (event's period is SAFE_MAX_CPU_FREQ) + */ + for_each_cpu(cpu, policy->cpus) + smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false); + + return NOTIFY_DONE; +} + +static struct notifier_block watchdog_freq_notifier = { + .notifier_call = watchdog_freq_notifier_callback, +}; + +static int __init init_watchdog_freq_notifier(void) +{ + return cpufreq_register_notifier(&watchdog_freq_notifier, + CPUFREQ_POLICY_NOTIFIER); +} +core_initcall(init_watchdog_freq_notifier); -- Gitee