1 /*
2 * Copyright (c) 2010-2014 Wind River Systems, Inc.
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 /**
8 * @file
9 * @brief IA-32 specific kernel interface header
10 * This header contains the IA-32 specific kernel interface. It is included
11 * by the generic kernel interface header (include/arch/cpu.h)
12 */
13
14 #ifndef ZEPHYR_INCLUDE_ARCH_X86_IA32_ARCH_H_
15 #define ZEPHYR_INCLUDE_ARCH_X86_IA32_ARCH_H_
16
17 #include "sys_io.h"
18 #include <stdbool.h>
19 #include <zephyr/kernel_structs.h>
20 #include <zephyr/arch/common/ffs.h>
21 #include <zephyr/sys/util.h>
22 #include <zephyr/arch/x86/ia32/gdbstub.h>
23 #include <zephyr/arch/x86/ia32/thread.h>
24 #include <zephyr/arch/x86/ia32/syscall.h>
25
26 #ifndef _ASMLANGUAGE
27 #include <stddef.h> /* for size_t */
28
29 #include <zephyr/arch/common/addr_types.h>
30 #include <zephyr/arch/x86/ia32/segmentation.h>
31 #include <zephyr/pm/pm.h>
32
33 #endif /* _ASMLANGUAGE */
34
35 /* GDT layout */
36 #define CODE_SEG 0x08
37 #define DATA_SEG 0x10
38 #define MAIN_TSS 0x18
39 #define DF_TSS 0x20
40
41 /*
42 * Use for thread local storage.
43 * Match these to gen_gdt.py.
44 * The 0x03 is added to limit privilege.
45 */
46 #if defined(CONFIG_USERSPACE)
47 #define GS_TLS_SEG (0x38 | 0x03)
48 #elif defined(CONFIG_HW_STACK_PROTECTION)
49 #define GS_TLS_SEG (0x28 | 0x03)
50 #else
51 #define GS_TLS_SEG (0x18 | 0x03)
52 #endif
53
54 /**
55 * Macro used internally by NANO_CPU_INT_REGISTER and NANO_CPU_INT_REGISTER_ASM.
56 * Not meant to be used explicitly by platform, driver or application code.
57 */
58 #define MK_ISR_NAME(x) __isr__##x
59
60 #define Z_DYN_STUB_SIZE 4
61 #define Z_DYN_STUB_OFFSET 0
62 #define Z_DYN_STUB_LONG_JMP_EXTRA_SIZE 3
63 #define Z_DYN_STUB_PER_BLOCK 32
64
65
66 #ifndef _ASMLANGUAGE
67
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71
72 /* interrupt/exception/error related definitions */
73
74 typedef struct s_isrList {
75 /** Address of ISR/stub */
76 void *fnc;
77 /** IRQ associated with the ISR/stub, or -1 if this is not
78 * associated with a real interrupt; in this case vec must
79 * not be -1
80 */
81 unsigned int irq;
82 /** Priority associated with the IRQ. Ignored if vec is not -1 */
83 unsigned int priority;
84 /** Vector number associated with ISR/stub, or -1 to assign based
85 * on priority
86 */
87 unsigned int vec;
88 /** Privilege level associated with ISR/stub */
89 unsigned int dpl;
90
91 /** If nonzero, specifies a TSS segment selector. Will configure
92 * a task gate instead of an interrupt gate. fnc parameter will be
93 * ignored
94 */
95 unsigned int tss;
96 } ISR_LIST;
97
98
99 /**
100 * @brief Connect a routine to an interrupt vector
101 *
102 * This macro "connects" the specified routine, @a r, to the specified interrupt
103 * vector, @a v using the descriptor privilege level @a d. On the IA-32
104 * architecture, an interrupt vector is a value from 0 to 255. This macro
105 * populates the special intList section with the address of the routine, the
106 * vector number and the descriptor privilege level. The genIdt tool then picks
107 * up this information and generates an actual IDT entry with this information
108 * properly encoded.
109 *
110 * The @a d argument specifies the privilege level for the interrupt-gate
111 * descriptor; (hardware) interrupts and exceptions should specify a level of 0,
112 * whereas handlers for user-mode software generated interrupts should specify 3.
113 * @param r Routine to be connected
114 * @param n IRQ number
115 * @param p IRQ priority
116 * @param v Interrupt Vector
117 * @param d Descriptor Privilege Level
118 */
119
120 #define NANO_CPU_INT_REGISTER(r, n, p, v, d) \
121 static ISR_LIST __attribute__((section(".intList"))) \
122 __attribute__((used)) MK_ISR_NAME(r) = \
123 { \
124 .fnc = &(r), \
125 .irq = (n), \
126 .priority = (p), \
127 .vec = (v), \
128 .dpl = (d), \
129 .tss = 0 \
130 }
131
132 /**
133 * @brief Connect an IA hardware task to an interrupt vector
134 *
135 * This is very similar to NANO_CPU_INT_REGISTER but instead of connecting
136 * a handler function, the interrupt will induce an IA hardware task
137 * switch to another hardware task instead.
138 *
139 * @param tss_p GDT/LDT segment selector for the TSS representing the task
140 * @param irq_p IRQ number
141 * @param priority_p IRQ priority
142 * @param vec_p Interrupt vector
143 * @param dpl_p Descriptor privilege level
144 */
145 #define _X86_IDT_TSS_REGISTER(tss_p, irq_p, priority_p, vec_p, dpl_p) \
146 static ISR_LIST __attribute__((section(".intList"))) \
147 __attribute__((used)) MK_ISR_NAME(vec_p) = \
148 { \
149 .fnc = NULL, \
150 .irq = (irq_p), \
151 .priority = (priority_p), \
152 .vec = (vec_p), \
153 .dpl = (dpl_p), \
154 .tss = (tss_p) \
155 }
156
157 /**
158 * Code snippets for populating the vector ID and priority into the intList
159 *
160 * The 'magic' of static interrupts is accomplished by building up an array
161 * 'intList' at compile time, and the gen_idt tool uses this to create the
162 * actual IDT data structure.
163 *
164 * For controllers like APIC, the vectors in the IDT are not normally assigned
165 * at build time; instead the sentinel value -1 is saved, and gen_idt figures
166 * out the right vector to use based on our priority scheme. Groups of 16
167 * vectors starting at 32 correspond to each priority level.
168 *
169 * These macros are only intended to be used by IRQ_CONNECT() macro.
170 */
171 #define _VECTOR_ARG(irq_p) (-1)
172
173 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
174 #define IRQSTUBS_TEXT_SECTION ".pinned_text.irqstubs"
175 #else
176 #define IRQSTUBS_TEXT_SECTION ".text.irqstubs"
177 #endif
178
179 /* Internally this function does a few things:
180 *
181 * 1. There is a declaration of the interrupt parameters in the .intList
182 * section, used by gen_idt to create the IDT. This does the same thing
183 * as the NANO_CPU_INT_REGISTER() macro, but is done in assembly as we
184 * need to populate the .fnc member with the address of the assembly
185 * IRQ stub that we generate immediately afterwards.
186 *
187 * 2. The IRQ stub itself is declared. The code will go in its own named
188 * section .text.irqstubs section (which eventually gets linked into 'text')
189 * and the stub shall be named (isr_name)_irq(irq_line)_stub
190 *
191 * 3. The IRQ stub pushes the ISR routine and its argument onto the stack
192 * and then jumps to the common interrupt handling code in _interrupt_enter().
193 *
194 * 4. z_irq_controller_irq_config() is called at runtime to set the mapping
195 * between the vector and the IRQ line as well as triggering flags
196 */
197 #define ARCH_IRQ_CONNECT(irq_p, priority_p, isr_p, isr_param_p, flags_p) \
198 { \
199 __asm__ __volatile__( \
200 ".pushsection .intList\n\t" \
201 ".long %c[isr]_irq%c[irq]_stub\n\t" /* ISR_LIST.fnc */ \
202 ".long %c[irq]\n\t" /* ISR_LIST.irq */ \
203 ".long %c[priority]\n\t" /* ISR_LIST.priority */ \
204 ".long %c[vector]\n\t" /* ISR_LIST.vec */ \
205 ".long 0\n\t" /* ISR_LIST.dpl */ \
206 ".long 0\n\t" /* ISR_LIST.tss */ \
207 ".popsection\n\t" \
208 ".pushsection " IRQSTUBS_TEXT_SECTION "\n\t" \
209 ".global %c[isr]_irq%c[irq]_stub\n\t" \
210 "%c[isr]_irq%c[irq]_stub:\n\t" \
211 "pushl %[isr_param]\n\t" \
212 "pushl %[isr]\n\t" \
213 "jmp _interrupt_enter\n\t" \
214 ".popsection\n\t" \
215 : \
216 : [isr] "i" (isr_p), \
217 [isr_param] "i" (isr_param_p), \
218 [priority] "i" (priority_p), \
219 [vector] "i" _VECTOR_ARG(irq_p), \
220 [irq] "i" (irq_p)); \
221 z_irq_controller_irq_config(Z_IRQ_TO_INTERRUPT_VECTOR(irq_p), (irq_p), \
222 (flags_p)); \
223 }
224
225 #ifdef CONFIG_PCIE
226
227 #define ARCH_PCIE_IRQ_CONNECT(bdf_p, irq_p, priority_p, \
228 isr_p, isr_param_p, flags_p) \
229 ARCH_IRQ_CONNECT(irq_p, priority_p, isr_p, isr_param_p, flags_p)
230
231 #endif /* CONFIG_PCIE */
232
233 /* Direct interrupts won't work as expected with KPTI turned on, because
234 * all non-user accessible pages in the page table are marked non-present.
235 * It's likely possible to add logic to ARCH_ISR_DIRECT_HEADER/FOOTER to do
236 * the necessary trampolining to switch page tables / stacks, but this
237 * probably loses all the latency benefits that direct interrupts provide
238 * and one might as well use a regular interrupt anyway.
239 */
240 #ifndef CONFIG_X86_KPTI
241 #define ARCH_IRQ_DIRECT_CONNECT(irq_p, priority_p, isr_p, flags_p) \
242 { \
243 NANO_CPU_INT_REGISTER(isr_p, irq_p, priority_p, -1, 0); \
244 z_irq_controller_irq_config(Z_IRQ_TO_INTERRUPT_VECTOR(irq_p), (irq_p), \
245 (flags_p)); \
246 }
247
248 #ifdef CONFIG_PM
arch_irq_direct_pm(void)249 static inline void arch_irq_direct_pm(void)
250 {
251 if (_kernel.idle) {
252 _kernel.idle = 0;
253 z_pm_save_idle_exit();
254 }
255 }
256
257 #define ARCH_ISR_DIRECT_PM() arch_irq_direct_pm()
258 #else
259 #define ARCH_ISR_DIRECT_PM() do { } while (false)
260 #endif
261
262 #define ARCH_ISR_DIRECT_HEADER() arch_isr_direct_header()
263 #define ARCH_ISR_DIRECT_FOOTER(swap) arch_isr_direct_footer(swap)
264
265 /* FIXME:
266 * tracing/tracing.h cannot be included here due to circular dependency
267 */
268 #if defined(CONFIG_TRACING)
269 extern void sys_trace_isr_enter(void);
270 extern void sys_trace_isr_exit(void);
271 #endif
272
arch_isr_direct_header(void)273 static inline void arch_isr_direct_header(void)
274 {
275 #if defined(CONFIG_TRACING)
276 sys_trace_isr_enter();
277 #endif
278
279 /* We're not going to unlock IRQs, but we still need to increment this
280 * so that arch_is_in_isr() works
281 */
282 ++_kernel.cpus[0].nested;
283 }
284
285 /*
286 * FIXME: z_swap_irqlock is an inline function declared in a private header and
287 * cannot be referenced from a public header, so we move it to an
288 * external function.
289 */
290 extern void arch_isr_direct_footer_swap(unsigned int key);
291
arch_isr_direct_footer(int swap)292 static inline void arch_isr_direct_footer(int swap)
293 {
294 z_irq_controller_eoi();
295 #if defined(CONFIG_TRACING)
296 sys_trace_isr_exit();
297 #endif
298 --_kernel.cpus[0].nested;
299
300 /* Call swap if all the following is true:
301 *
302 * 1) swap argument was enabled to this function
303 * 2) We are not in a nested interrupt
304 * 3) Next thread to run in the ready queue is not this thread
305 */
306 if (swap != 0 && _kernel.cpus[0].nested == 0 &&
307 _kernel.ready_q.cache != _current) {
308 unsigned int flags;
309
310 /* Fetch EFLAGS argument to z_swap() */
311 __asm__ volatile (
312 "pushfl\n\t"
313 "popl %0\n\t"
314 : "=g" (flags)
315 :
316 : "memory"
317 );
318
319 arch_isr_direct_footer_swap(flags);
320 }
321 }
322
323 #define ARCH_ISR_DIRECT_DECLARE(name) \
324 static inline int name##_body(void); \
325 __attribute__ ((interrupt)) void name(void *stack_frame) \
326 { \
327 ARG_UNUSED(stack_frame); \
328 int check_reschedule; \
329 ISR_DIRECT_HEADER(); \
330 check_reschedule = name##_body(); \
331 ISR_DIRECT_FOOTER(check_reschedule); \
332 } \
333 static inline int name##_body(void)
334 #endif /* !CONFIG_X86_KPTI */
335
336 /**
337 * @brief Exception Stack Frame
338 *
339 * A pointer to an "exception stack frame" (ESF) is passed as an argument
340 * to exception handlers registered via nanoCpuExcConnect(). As the system
341 * always operates at ring 0, only the EIP, CS and EFLAGS registers are pushed
342 * onto the stack when an exception occurs.
343 *
344 * The exception stack frame includes the volatile registers (EAX, ECX, and
345 * EDX) as well as the 5 non-volatile registers (EDI, ESI, EBX, EBP and ESP).
346 * Those registers are pushed onto the stack by _ExcEnt().
347 */
348
349 typedef struct nanoEsf {
350 #ifdef CONFIG_GDBSTUB
351 unsigned int ss;
352 unsigned int gs;
353 unsigned int fs;
354 unsigned int es;
355 unsigned int ds;
356 #endif
357 unsigned int esp;
358 unsigned int ebp;
359 unsigned int ebx;
360 unsigned int esi;
361 unsigned int edi;
362 unsigned int edx;
363 unsigned int eax;
364 unsigned int ecx;
365 unsigned int errorCode;
366 unsigned int eip;
367 unsigned int cs;
368 unsigned int eflags;
369 } z_arch_esf_t;
370
371 extern unsigned int z_x86_exception_vector;
372
373 struct _x86_syscall_stack_frame {
374 uint32_t eip;
375 uint32_t cs;
376 uint32_t eflags;
377
378 /* These are only present if cs = USER_CODE_SEG */
379 uint32_t esp;
380 uint32_t ss;
381 };
382
arch_irq_lock(void)383 static ALWAYS_INLINE unsigned int arch_irq_lock(void)
384 {
385 unsigned int key;
386
387 __asm__ volatile ("pushfl; cli; popl %0" : "=g" (key) :: "memory");
388
389 return key;
390 }
391
392
393 /**
394 * The NANO_SOFT_IRQ macro must be used as the value for the @a irq parameter
395 * to NANO_CPU_INT_REGISTER when connecting to an interrupt that does not
396 * correspond to any IRQ line (such as spurious vector or SW IRQ)
397 */
398 #define NANO_SOFT_IRQ ((unsigned int) (-1))
399
400 #ifdef CONFIG_X86_ENABLE_TSS
401 extern struct task_state_segment _main_tss;
402 #endif
403
404 #define ARCH_EXCEPT(reason_p) do { \
405 __asm__ volatile( \
406 "push %[reason]\n\t" \
407 "int %[vector]\n\t" \
408 : \
409 : [vector] "i" (Z_X86_OOPS_VECTOR), \
410 [reason] "i" (reason_p)); \
411 CODE_UNREACHABLE; /* LCOV_EXCL_LINE */ \
412 } while (false)
413
414 /*
415 * Dynamic thread object memory alignment.
416 *
417 * If support for SSEx extensions is enabled a 16 byte boundary is required,
418 * since the 'fxsave' and 'fxrstor' instructions require this. In all other
419 * cases a 4 byte boundary is sufficient.
420 */
421 #if defined(CONFIG_EAGER_FPU_SHARING) || defined(CONFIG_LAZY_FPU_SHARING)
422 #ifdef CONFIG_SSE
423 #define ARCH_DYNAMIC_OBJ_K_THREAD_ALIGNMENT 16
424 #else
425 #define ARCH_DYNAMIC_OBJ_K_THREAD_ALIGNMENT (sizeof(void *))
426 #endif
427 #else
428 /* No special alignment requirements, simply align on pointer size. */
429 #define ARCH_DYNAMIC_OBJ_K_THREAD_ALIGNMENT (sizeof(void *))
430 #endif /* CONFIG_*_FP_SHARING */
431
432
433 #ifdef __cplusplus
434 }
435 #endif
436
437 #endif /* !_ASMLANGUAGE */
438
439 #endif /* ZEPHYR_INCLUDE_ARCH_X86_IA32_ARCH_H_ */
440