diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index abf31b78208e384caabc83d83b85aec419ba81a2..4bb33ff901e26908867456ebd990550949e65abe 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1343,6 +1343,8 @@ static int __init armv8_pmu_driver_init(void) } device_initcall(armv8_pmu_driver_init) +extern struct clock_data cd; + void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { @@ -1357,21 +1359,21 @@ void arch_perf_update_userpage(struct perf_event *event, do { rd = sched_clock_read_begin(&seq); - if (rd->read_sched_clock != arch_timer_read_counter) + if (cd.read_sched_clock != arch_timer_read_counter) return; - userpg->time_mult = rd->mult; - userpg->time_shift = rd->shift; + userpg->time_mult = cd.mult; + userpg->time_shift = cd.shift; userpg->time_zero = rd->epoch_ns; userpg->time_cycles = rd->epoch_cyc; - userpg->time_mask = rd->sched_clock_mask; + userpg->time_mask = cd.sched_clock_mask; /* * Subtract the cycle base, such that software that * doesn't know about cap_user_time_short still 'works' * assuming no wraps. */ - ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); + ns = mul_u64_u32_shr(rd->epoch_cyc, cd.mult, cd.shift); userpg->time_zero -= ns; } while (sched_clock_read_retry(seq)); diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index 835ee87ed79228d846fdf9c20ff4aa993cb07a60..579b6d02c80cc5bf259c8b4659d1e9ce7c499154 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -11,11 +11,6 @@ * * @epoch_ns: sched_clock() value at last update * @epoch_cyc: Clock cycle value at last update. - * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit - * clocks. - * @read_sched_clock: Current clock source (or dummy source when suspended). - * @mult: Multiplier for scaled math conversion. - * @shift: Shift value for scaled math conversion. * * Care must be taken when updating this structure; it is read by * some very hot code paths. It occupies <=40 bytes and, when combined @@ -25,10 +20,47 @@ struct clock_read_data { u64 epoch_ns; u64 epoch_cyc; +}; + +/** + * struct clock_data - all data needed for sched_clock() (including + * registration of a new clock source) + * + * @seq: Sequence counter for protecting updates. The lowest + * bit is the index for @read_data. + * @read_data: Data required to read from sched_clock. + * + * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit + * clocks. + * @read_sched_clock: Current clock source (or dummy source when suspended). + * @mult: Multiplier for scaled math conversion. + * @shift: Shift value for scaled math conversion. + * + * @wrap_kt: Duration for which clock can run before wrapping. + * @rate: Tick rate of the registered clock. + * @actual_read_sched_clock: Registered hardware level clock read function. + * + * The ordering of this structure has been chosen to optimize cache + * performance. In particular 'seq' and 'read_data[0]' (combined) should fit + * into a single 64-byte cache line. + */ +struct clock_data { + seqcount_latch_t seq; + struct clock_read_data read_data[2]; + + struct { + char padding[0]; + } ____cacheline_aligned; + u64 sched_clock_mask; u64 (*read_sched_clock)(void); u32 mult; u32 shift; + + ktime_t wrap_kt; + unsigned long rate; + + u64 (*actual_read_sched_clock)(void); }; extern struct clock_read_data *sched_clock_read_begin(unsigned int *seq); diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index b1b9b12899f5e43571e9cc39740f545cd6fad795..de50a321f835089b5c4da98bcca37c1b00fdb4be 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -19,30 +19,6 @@ #include "timekeeping.h" -/** - * struct clock_data - all data needed for sched_clock() (including - * registration of a new clock source) - * - * @seq: Sequence counter for protecting updates. The lowest - * bit is the index for @read_data. - * @read_data: Data required to read from sched_clock. - * @wrap_kt: Duration for which clock can run before wrapping. - * @rate: Tick rate of the registered clock. - * @actual_read_sched_clock: Registered hardware level clock read function. - * - * The ordering of this structure has been chosen to optimize cache - * performance. In particular 'seq' and 'read_data[0]' (combined) should fit - * into a single 64-byte cache line. - */ -struct clock_data { - seqcount_latch_t seq; - struct clock_read_data read_data[2]; - ktime_t wrap_kt; - unsigned long rate; - - u64 (*actual_read_sched_clock)(void); -}; - static struct hrtimer sched_clock_timer; static int irqtime = -1; @@ -57,9 +33,9 @@ static u64 notrace jiffy_sched_clock_read(void) return (u64)(jiffies - INITIAL_JIFFIES); } -static struct clock_data cd ____cacheline_aligned = { - .read_data[0] = { .mult = NSEC_PER_SEC / HZ, - .read_sched_clock = jiffy_sched_clock_read, }, +struct clock_data cd ____cacheline_aligned = { + .mult = NSEC_PER_SEC / HZ, + .read_sched_clock = jiffy_sched_clock_read, .actual_read_sched_clock = jiffy_sched_clock_read, }; @@ -88,9 +64,9 @@ unsigned long long notrace sched_clock(void) do { rd = sched_clock_read_begin(&seq); - cyc = (rd->read_sched_clock() - rd->epoch_cyc) & - rd->sched_clock_mask; - res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift); + cyc = (cd.read_sched_clock() - rd->epoch_cyc) & + cd.sched_clock_mask; + res = rd->epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); } while (sched_clock_read_retry(seq)); return res; @@ -133,7 +109,7 @@ static void update_sched_clock(void) rd = cd.read_data[0]; cyc = cd.actual_read_sched_clock(); - ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); + ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & cd.sched_clock_mask, cd.mult, cd.shift); rd.epoch_ns = ns; rd.epoch_cyc = cyc; @@ -179,13 +155,14 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) /* Update epoch for new counter and update 'epoch_ns' from old counter*/ new_epoch = read(); cyc = cd.actual_read_sched_clock(); - ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); + ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & cd.sched_clock_mask, cd.mult, cd.shift); + cd.actual_read_sched_clock = read; + cd.read_sched_clock = read; + cd.sched_clock_mask = new_mask; + cd.mult = new_mult; + cd.shift = new_shift; - rd.read_sched_clock = read; - rd.sched_clock_mask = new_mask; - rd.mult = new_mult; - rd.shift = new_shift; rd.epoch_cyc = new_epoch; rd.epoch_ns = ns; @@ -265,11 +242,9 @@ static u64 notrace suspended_sched_clock_read(void) int sched_clock_suspend(void) { - struct clock_read_data *rd = &cd.read_data[0]; - update_sched_clock(); hrtimer_cancel(&sched_clock_timer); - rd->read_sched_clock = suspended_sched_clock_read; + cd.read_sched_clock = suspended_sched_clock_read; return 0; } @@ -280,7 +255,7 @@ void sched_clock_resume(void) rd->epoch_cyc = cd.actual_read_sched_clock(); hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL_HARD); - rd->read_sched_clock = cd.actual_read_sched_clock; + cd.read_sched_clock = cd.actual_read_sched_clock; } static struct syscore_ops sched_clock_ops = {