1 /*
2 * Copyright (c) 2021 Intel Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 */
5 #include <drivers/timer/system_timer.h>
6 #include <sys_clock.h>
7 #include <spinlock.h>
8 #include <drivers/interrupt_controller/loapic.h>
9
10 #define IA32_TSC_DEADLINE_MSR 0x6e0
11 #define IA32_TSC_ADJUST_MSR 0x03b
12
13 #define CYC_PER_TICK (CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC \
14 / (uint64_t) CONFIG_SYS_CLOCK_TICKS_PER_SEC)
15
16 struct apic_timer_lvt {
17 uint8_t vector : 8;
18 uint8_t unused0 : 8;
19 uint8_t masked : 1;
20 enum { ONE_SHOT, PERIODIC, TSC_DEADLINE } mode: 2;
21 uint32_t unused2 : 13;
22 };
23
24 static struct k_spinlock lock;
25 static uint64_t last_announce;
26 static union { uint32_t val; struct apic_timer_lvt lvt; } lvt_reg;
27
rdtsc(void)28 static ALWAYS_INLINE uint64_t rdtsc(void)
29 {
30 uint32_t hi, lo;
31
32 __asm__ volatile("rdtsc" : "=d"(hi), "=a"(lo));
33 return lo + (((uint64_t)hi) << 32);
34 }
35
isr(const void * arg)36 static void isr(const void *arg)
37 {
38 ARG_UNUSED(arg);
39 k_spinlock_key_t key = k_spin_lock(&lock);
40 uint32_t ticks = (rdtsc() - last_announce) / CYC_PER_TICK;
41
42 last_announce += ticks * CYC_PER_TICK;
43 k_spin_unlock(&lock, key);
44 sys_clock_announce(ticks);
45
46 if (!IS_ENABLED(CONFIG_TICKLESS_KERNEL)) {
47 sys_clock_set_timeout(1, false);
48 }
49 }
50
wrmsr(int32_t msr,uint64_t val)51 static inline void wrmsr(int32_t msr, uint64_t val)
52 {
53 uint32_t hi = (uint32_t) (val >> 32);
54 uint32_t lo = (uint32_t) val;
55
56 __asm__ volatile("wrmsr" :: "d"(hi), "a"(lo), "c"(msr));
57 }
58
sys_clock_set_timeout(int32_t ticks,bool idle)59 void sys_clock_set_timeout(int32_t ticks, bool idle)
60 {
61 ARG_UNUSED(idle);
62
63 uint64_t now = rdtsc();
64 k_spinlock_key_t key = k_spin_lock(&lock);
65 uint64_t expires = now + MAX(ticks - 1, 0) * CYC_PER_TICK;
66
67 expires = last_announce + (((expires - last_announce + CYC_PER_TICK - 1)
68 / CYC_PER_TICK) * CYC_PER_TICK);
69
70 /* The second condition is to catch the wraparound.
71 * Interpreted strictly, the IA SDM description of the
72 * TSC_DEADLINE MSR implies that it will trigger an immediate
73 * interrupt if we try to set an expiration across the 64 bit
74 * rollover. Unfortunately there's no way to test that as on
75 * real hardware it requires more than a century of uptime,
76 * but this is cheap and safe.
77 */
78 if (ticks == K_TICKS_FOREVER || expires < last_announce) {
79 expires = UINT64_MAX;
80 }
81
82 wrmsr(IA32_TSC_DEADLINE_MSR, expires);
83 k_spin_unlock(&lock, key);
84 }
85
sys_clock_elapsed(void)86 uint32_t sys_clock_elapsed(void)
87 {
88 k_spinlock_key_t key = k_spin_lock(&lock);
89 uint32_t ret = (rdtsc() - last_announce) / CYC_PER_TICK;
90
91 k_spin_unlock(&lock, key);
92 return ret;
93 }
94
sys_clock_cycle_get_32(void)95 uint32_t sys_clock_cycle_get_32(void)
96 {
97 return (uint32_t) rdtsc();
98 }
99
timer_irq(void)100 static inline uint32_t timer_irq(void)
101 {
102 /* The Zephyr APIC API is... idiosyncratic. The timer is a
103 * "local vector table" interrupt. These aren't system IRQs
104 * presented to the IO-APIC, they're indices into a register
105 * array in the local APIC. By Zephyr convention they come
106 * after all the external IO-APIC interrupts, but that number
107 * changes depending on device configuration so we have to
108 * fetch it at runtime. The timer happens to be the first
109 * entry in the table.
110 */
111 return z_loapic_irq_base();
112 }
113
114 /* The TSC_ADJUST MSR implements a synchronized offset such that
115 * multiple CPUs (within a socket, anyway) can synchronize exactly, or
116 * implement managed timing spaces for guests in a recoverable way,
117 * etc... We set it to zero on all cores for simplicity, because
118 * firmware often leaves it in an inconsistent state between cores.
119 */
clear_tsc_adjust(void)120 static void clear_tsc_adjust(void)
121 {
122 /* But don't touch it on ACRN, where an hypervisor bug
123 * confuses the APIC emulation and deadline interrupts don't
124 * arrive.
125 */
126 #ifndef CONFIG_BOARD_ACRN
127 wrmsr(IA32_TSC_ADJUST_MSR, 0);
128 #endif
129 }
130
smp_timer_init(void)131 void smp_timer_init(void)
132 {
133 /* Copy the LVT configuration from CPU0, because IRQ_CONNECT()
134 * doesn't know how to manage LVT interrupts for anything
135 * other than the calling/initial CPU. Same fence needed to
136 * prevent later MSR writes from reordering before the APIC
137 * configuration write.
138 */
139 x86_write_loapic(LOAPIC_TIMER, lvt_reg.val);
140 __asm__ volatile("mfence" ::: "memory");
141 clear_tsc_adjust();
142 irq_enable(timer_irq());
143 }
144
cpuid(uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)145 static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
146 {
147 __asm__ volatile("cpuid"
148 : "=b"(*ebx), "=c"(*ecx), "=d"(*edx)
149 : "a"(*eax), "c"(*ecx));
150 }
151
sys_clock_driver_init(const struct device * dev)152 int sys_clock_driver_init(const struct device *dev)
153 {
154 #ifdef CONFIG_ASSERT
155 uint32_t eax, ebx, ecx, edx;
156
157 eax = 1; ecx = 0;
158 cpuid(&eax, &ebx, &ecx, &edx);
159 __ASSERT((ecx & BIT(24)) != 0, "No TSC Deadline support");
160
161 eax = 0x80000007; ecx = 0;
162 cpuid(&eax, &ebx, &ecx, &edx);
163 __ASSERT((edx & BIT(8)) != 0, "No Invariant TSC support");
164
165 eax = 7; ecx = 0;
166 cpuid(&eax, &ebx, &ecx, &edx);
167 __ASSERT((ebx & BIT(1)) != 0, "No TSC_ADJUST MSR support");
168 #endif
169
170 clear_tsc_adjust();
171
172 /* Timer interrupt number is runtime-fetched, so can't use
173 * static IRQ_CONNECT()
174 */
175 irq_connect_dynamic(timer_irq(), CONFIG_APIC_TIMER_IRQ_PRIORITY, isr, 0, 0);
176
177 lvt_reg.val = x86_read_loapic(LOAPIC_TIMER);
178 lvt_reg.lvt.mode = TSC_DEADLINE;
179 lvt_reg.lvt.masked = 0;
180 x86_write_loapic(LOAPIC_TIMER, lvt_reg.val);
181
182 /* Per the SDM, the TSC_DEADLINE MSR is not serializing, so
183 * this fence is needed to be sure that an upcoming MSR write
184 * (i.e. a timeout we're about to set) cannot possibly reorder
185 * around the initialization we just did.
186 */
187 __asm__ volatile("mfence" ::: "memory");
188
189 last_announce = rdtsc();
190 irq_enable(timer_irq());
191
192 if (!IS_ENABLED(CONFIG_TICKLESS_KERNEL)) {
193 sys_clock_set_timeout(1, false);
194 }
195
196 return 0;
197 }
198