[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 4/4] i386: Refactor int stacks to be per cpu for SMP
From: |
Damien Zammit |
Subject: |
[PATCH 4/4] i386: Refactor int stacks to be per cpu for SMP |
Date: |
Fri, 11 Nov 2022 23:21:38 +0000 |
---
i386/Makefrag.am | 6 +
i386/i386/cpu_number.h | 31 +++-
i386/i386/cswitch.S | 6 +-
i386/i386/locore.S | 31 ++--
i386/i386/mp_desc.c | 281 ++++++++++++++++++-------------
i386/i386/mp_desc.h | 9 +-
i386/i386/xen.h | 2 +-
i386/i386at/boothdr.S | 18 +-
i386/i386at/model_dep.c | 101 ++---------
i386/i386at/model_dep.h | 3 +-
i386/intel/pmap.c | 158 +++++++++++++----
i386/intel/pmap.h | 8 +-
linux/dev/arch/i386/kernel/irq.c | 13 +-
linux/dev/init/main.c | 2 +
14 files changed, 396 insertions(+), 273 deletions(-)
diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index 8d6ef8cd..b74aad35 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -30,6 +30,8 @@ if HOST_ix86
#
libkernel_a_SOURCES += \
+ i386/i386at/acpi_parse_apic.h \
+ i386/i386at/acpi_parse_apic.c \
i386/i386at/autoconf.c \
i386/i386at/autoconf.h \
i386/i386at/biosmem.c \
@@ -94,7 +96,9 @@ libkernel_a_SOURCES += \
i386/i386/ast_types.h \
i386/i386/cpu.h \
i386/i386/cpu_number.h \
+ i386/i386/cpu_number.c \
i386/i386/cswitch.S \
+ i386/i386/cpuboot.S \
i386/i386/db_disasm.c \
i386/i386/db_interface.c \
i386/i386/db_interface.h \
@@ -158,6 +162,8 @@ libkernel_a_SOURCES += \
i386/i386/user_ldt.h \
i386/i386/vm_param.h \
i386/i386/xpr.h \
+ i386/i386/smp.h \
+ i386/i386/smp.c \
i386/intel/pmap.c \
i386/intel/pmap.h \
i386/intel/read_fault.c \
diff --git a/i386/i386/cpu_number.h b/i386/i386/cpu_number.h
index 9aef6370..d56cb602 100644
--- a/i386/i386/cpu_number.h
+++ b/i386/i386/cpu_number.h
@@ -35,14 +35,35 @@
/* More-specific code must define cpu_number() and CPU_NUMBER. */
#ifdef __i386__
#define CX(addr, reg) addr(,reg,4)
+
+/* CPU_NUMBER(%ebx) will _not_ work! */
+#define CPU_NUMBER(reg) \
+ pushfl ;\
+ cli ;\
+ pushl %esi ;\
+ pushl %edi ;\
+ pushl %ebx ;\
+ pushl %eax ;\
+ call EXT(cpu_number) ;\
+ movl %eax, %ebx ;\
+ popl %eax ;\
+ movl %ebx, reg ;\
+ popl %ebx ;\
+ popl %edi ;\
+ popl %esi ;\
+ popfl
+
#endif
#ifdef __x86_64__
#define CX(addr, reg) addr(,reg,8)
+#warning Missing CPU_NUMBER() for 64 bit
+#define CPU_NUMBER(reg)
#endif
-/* XXX For now */
-#define CPU_NUMBER(reg) movl $0,reg
-#define cpu_number() 0
+#ifndef __ASSEMBLER__
+#include "kern/cpu_number.h"
+int cpu_number();
+#endif
#else /* NCPUS == 1 */
@@ -51,8 +72,4 @@
#endif /* NCPUS == 1 */
-#ifndef __ASSEMBLER__
-#include "kern/cpu_number.h"
-#endif
-
#endif /* _I386_CPU_NUMBER_H_ */
diff --git a/i386/i386/cswitch.S b/i386/i386/cswitch.S
index 718c8aac..ae941bdd 100644
--- a/i386/i386/cswitch.S
+++ b/i386/i386/cswitch.S
@@ -110,7 +110,7 @@ ENTRY(Thread_continue)
*/
ENTRY(switch_to_shutdown_context)
CPU_NUMBER(%edx)
- movl EXT(active_stacks)(,%edx,4),%ecx /* get old kernel stack
*/
+ movl CX(EXT(active_stacks),%edx),%ecx /* get old kernel stack
*/
movl %ebx,KSS_EBX(%ecx) /* save registers */
movl %ebp,KSS_EBP(%ecx)
movl %edi,KSS_EDI(%ecx)
@@ -124,8 +124,8 @@ ENTRY(switch_to_shutdown_context)
movl 4(%esp),%ebx /* get routine to run next */
movl 8(%esp),%esi /* get its argument */
- movl EXT(interrupt_stack)(,%edx,4),%ecx /* point to its
interrupt stack */
- lea INTSTACK_SIZE(%ecx),%esp /* switch to it (top) */
+ movl CX(EXT(int_stack_base),%edx),%ecx /* point to its
interrupt stack */
+ lea -4+INTSTACK_SIZE(%ecx),%esp /* switch to it (top) */
pushl %eax /* push thread */
call EXT(thread_dispatch) /* reschedule thread */
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index b5122613..fb92b6e7 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -541,13 +541,15 @@ _kret_iret:
trap_from_kernel:
#if MACH_KDB || MACH_TTD
movl %esp,%ebx /* save current stack */
-
movl %esp,%edx /* on an interrupt stack? */
- and $(~(KERNEL_STACK_SIZE-1)),%edx
- cmpl EXT(int_stack_base),%edx
+
+ xorl %ecx,%ecx
+ and $(~(NCPUS*INTSTACK_SIZE-1)),%edx
+ cmpl CX(EXT(int_stack_base),%ecx),%edx
je 1f /* OK if so */
- CPU_NUMBER(%edx) /* get CPU number */
+ CPU_NUMBER(%edx)
+
cmpl CX(EXT(kernel_stack),%edx),%esp
/* already on kernel stack? */
ja 0f
@@ -668,9 +670,10 @@ ENTRY(all_intrs)
pushl %edx
cld /* clear direction flag */
+ xorl %ecx,%ecx
movl %esp,%edx /* on an interrupt stack? */
- and $(~(KERNEL_STACK_SIZE-1)),%edx
- cmpl %ss:EXT(int_stack_base),%edx
+ and $(~(NCPUS*INTSTACK_SIZE-1)),%edx
+ cmpl %ss:CX(EXT(int_stack_base),%ecx),%edx
je int_from_intstack /* if not: */
pushl %ds /* save segment registers */
@@ -686,6 +689,7 @@ ENTRY(all_intrs)
CPU_NUMBER(%edx)
movl CX(EXT(int_stack_top),%edx),%ecx
+
xchgl %ecx,%esp /* switch to interrupt stack */
#if STAT_TIME
@@ -724,19 +728,20 @@ LEXT(return_to_iret) /* ( label for
kdb_kintr and hardclock) */
pop %fs
pop %es
pop %ds
- pop %edx
- pop %ecx
- pop %eax
+ popl %edx
+ popl %ecx
+ popl %eax
iret /* return to caller */
int_from_intstack:
- cmpl EXT(int_stack_base),%esp /* seemingly looping? */
+ CPU_NUMBER(%edx)
+ cmpl CX(EXT(int_stack_base),%edx),%esp /* seemingly looping? */
jb stack_overflowed /* if not: */
call EXT(interrupt) /* call interrupt routine */
_return_to_iret_i: /* ( label for kdb_kintr) */
- pop %edx /* must have been on kernel segs */
- pop %ecx
- pop %eax /* no ASTs */
+ popl %edx /* must have been on kernel segs */
+ popl %ecx
+ popl %eax /* no ASTs */
iret
stack_overflowed:
diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c
index 1e9ea0fc..c6a55d90 100644
--- a/i386/i386/mp_desc.c
+++ b/i386/i386/mp_desc.c
@@ -24,25 +24,36 @@
* the rights to redistribute these changes.
*/
-#if NCPUS > 1
-
-#include <string.h>
-
#include <kern/cpu_number.h>
#include <kern/debug.h>
#include <kern/printf.h>
+#include <kern/smp.h>
+#include <kern/startup.h>
+#include <kern/kmutex.h>
#include <mach/machine.h>
#include <mach/xen.h>
#include <vm/vm_kern.h>
#include <i386/mp_desc.h>
#include <i386/lock.h>
+#include <i386/apic.h>
+#include <i386/locore.h>
+#include <i386/gdt.h>
+#include <i386at/idt.h>
+#include <i386at/int_init.h>
+#include <i386/cpu.h>
+#include <i386/smp.h>
+
#include <i386at/model_dep.h>
#include <machine/ktss.h>
+#include <machine/smp.h>
#include <machine/tss.h>
#include <machine/io_perm.h>
#include <machine/vm_param.h>
+#include <i386at/acpi_parse_apic.h>
+#include <string.h>
+
/*
* The i386 needs an interrupt stack to keep the PCB stack from being
* overrun by interrupts. All interrupt stacks MUST lie at lower addresses
@@ -52,20 +63,35 @@
/*
* Addresses of bottom and top of interrupt stacks.
*/
-vm_offset_t interrupt_stack[NCPUS];
vm_offset_t int_stack_top[NCPUS];
vm_offset_t int_stack_base[NCPUS];
-/*
- * Barrier address.
- */
-vm_offset_t int_stack_high;
+/* Interrupt stack allocation */
+uint8_t solid_intstack[NCPUS*INTSTACK_SIZE] __aligned(NCPUS*INTSTACK_SIZE);
+
+void
+interrupt_stack_alloc(void)
+{
+ int i;
+
+ /*
+ * Set up pointers to the top of the interrupt stack.
+ */
+ for (i = 0; i < NCPUS; i++) {
+ int_stack_base[i] = (vm_offset_t) &solid_intstack[i *
INTSTACK_SIZE];
+ int_stack_top[i] = (vm_offset_t) &solid_intstack[(i + 1) *
INTSTACK_SIZE] - 4;
+ }
+}
+
+#if NCPUS > 1
/*
- * First cpu`s interrupt stack.
+ * Flag to mark SMP init by BSP complete
*/
-extern char _intstack[]; /* bottom */
-extern char _eintstack[]; /* top */
+int bspdone;
+
+extern void *apboot, *apbootend;
+extern volatile ApicLocalUnit* lapic;
/*
* Multiprocessor i386/i486 systems use a separate copy of the
@@ -77,7 +103,7 @@ extern char _eintstack[]; /* top */
*/
/*
- * Allocated descriptor tables.
+ * Descriptor tables.
*/
struct mp_desc_table *mp_desc_table[NCPUS] = { 0 };
@@ -102,12 +128,13 @@ extern struct real_descriptor ldt[LDTSZ];
* Allocate and initialize the per-processor descriptor tables.
*/
-struct mp_desc_table *
+int
mp_desc_init(int mycpu)
{
struct mp_desc_table *mpt;
+ vm_offset_t mem;
- if (mycpu == master_cpu) {
+ if (mycpu == 0) {
/*
* Master CPU uses the tables built at boot time.
* Just set the TSS and GDT pointers.
@@ -118,110 +145,28 @@ mp_desc_init(int mycpu)
}
else {
/*
- * Other CPUs allocate the table from the bottom of
- * the interrupt stack.
+ * Allocate tables for other CPUs
*/
- mpt = (struct mp_desc_table *) interrupt_stack[mycpu];
+ if (!init_alloc_aligned(sizeof(struct mp_desc_table), &mem))
+ panic("not enough memory for descriptor tables");
+ mpt = (struct mp_desc_table *)phystokv(mem);
mp_desc_table[mycpu] = mpt;
mp_ktss[mycpu] = &mpt->ktss;
mp_gdt[mycpu] = mpt->gdt;
/*
- * Copy the tables
+ * Zero the tables
*/
- memcpy(mpt->idt,
- idt,
- sizeof(idt));
- memcpy(mpt->gdt,
- gdt,
- sizeof(gdt));
- memcpy(mpt->ldt,
- ldt,
- sizeof(ldt));
- memset(&mpt->ktss, 0,
- sizeof(struct task_tss));
+ memset(mpt->idt, 0, sizeof(idt));
+ memset(mpt->gdt, 0, sizeof(gdt));
+ memset(mpt->ldt, 0, sizeof(ldt));
+ memset(&mpt->ktss, 0, sizeof(struct task_tss));
- /*
- * Fix up the entries in the GDT to point to
- * this LDT and this TSS.
- */
-#ifdef MACH_RING1
- panic("TODO %s:%d\n",__FILE__,__LINE__);
-#else /* MACH_RING1 */
- _fill_gdt_sys_descriptor(mpt->gdt, KERNEL_LDT,
- (unsigned)&mpt->ldt,
- LDTSZ * sizeof(struct real_descriptor) - 1,
- ACC_P|ACC_PL_K|ACC_LDT, 0);
- _fill_gdt_sys_descriptor(mpt->gdt, KERNEL_TSS,
- (unsigned)&mpt->ktss,
- sizeof(struct task_tss) - 1,
- ACC_P|ACC_PL_K|ACC_TSS, 0);
-
- mpt->ktss.tss.ss0 = KERNEL_DS;
- mpt->ktss.tss.io_bit_map_offset = IOPB_INVAL;
- mpt->ktss.barrier = 0xFF;
-#endif /* MACH_RING1 */
-
- return mpt;
+ return mycpu;
}
}
-kern_return_t intel_startCPU(int slot_num)
-{
- printf("TODO: intel_startCPU\n");
-}
-
-/*
- * Called after all CPUs have been found, but before the VM system
- * is running. The machine array must show which CPUs exist.
- */
-void
-interrupt_stack_alloc(void)
-{
- int i;
- int cpu_count;
- vm_offset_t stack_start;
-
- /*
- * Count the number of CPUs.
- */
- cpu_count = 0;
- for (i = 0; i < NCPUS; i++)
- if (machine_slot[i].is_cpu)
- cpu_count++;
-
- /*
- * Allocate an interrupt stack for each CPU except for
- * the master CPU (which uses the bootstrap stack)
- */
- if (!init_alloc_aligned(INTSTACK_SIZE*(cpu_count-1), &stack_start))
- panic("not enough memory for interrupt stacks");
- stack_start = phystokv(stack_start);
-
- /*
- * Set up pointers to the top of the interrupt stack.
- */
- for (i = 0; i < NCPUS; i++) {
- if (i == master_cpu) {
- interrupt_stack[i] = (vm_offset_t) _intstack;
- int_stack_top[i] = (vm_offset_t) _eintstack;
- }
- else if (machine_slot[i].is_cpu) {
- interrupt_stack[i] = stack_start;
- int_stack_top[i] = stack_start + INTSTACK_SIZE;
-
- stack_start += INTSTACK_SIZE;
- }
- }
-
- /*
- * Set up the barrier address. All thread stacks MUST
- * be above this address.
- */
- int_stack_high = stack_start;
-}
-
/* XXX should be adjusted per CPU speed */
int simple_lock_pause_loop = 100;
@@ -255,24 +200,130 @@ void
interrupt_processor(int cpu)
{
printf("interrupt cpu %d\n",cpu);
+ smp_pmap_update(apic_get_cpu_apic_id(cpu));
+}
+
+void
+cpu_setup()
+{
+ unsigned apic_id = (((ApicLocalUnit*)phystokv(lapic_addr))->apic_id.r >>
24) & 0xff;
+ uint16_t cpu = apic_get_cpu_kernel_id(apic_id);
+
+ printf("AP=(%u) before\n", cpu);
+
+ pmap_bootstrap(cpu);
+ printf("AP=(%u) pmap done\n", cpu);
+
+ pmap_make_temporary_mapping(cpu);
+ printf("AP=(%u) tempmap done\n", cpu);
+
+#ifndef MACH_HYP
+ /* Turn paging on.
+ * TODO: Why does setting the WP bit here cause a crash?
+ */
+ set_cr0(get_cr0() | CR0_PG /* | CR0_WP */);
+ set_cr0(get_cr0() & ~(CR0_CD | CR0_NW));
+ if (CPU_HAS_FEATURE(CPU_FEATURE_PGE))
+ set_cr4(get_cr4() | CR4_PGE);
+#endif /* MACH_HYP */
+ flush_instr_queue();
+ printf("AP=(%u) paging done\n", cpu);
+
+ mp_desc_init(cpu);
+ printf("AP=(%u) mpdesc done\n", cpu);
+
+ ap_gdt_init(cpu);
+ printf("AP=(%u) gdt done\n", cpu);
+
+ ap_idt_init(cpu);
+ printf("AP=(%u) idt done\n", cpu);
+
+ ap_int_init(cpu);
+ printf("AP=(%u) int done\n", cpu);
+
+ ap_ldt_init(cpu);
+ printf("AP=(%u) ldt done\n", cpu);
+
+ ap_ktss_init(cpu);
+ printf("AP=(%u) ktss done\n", cpu);
+
+ pmap_remove_temporary_mapping(cpu);
+ printf("AP=(%u) remove tempmap done\n", cpu);
+
+ pmap_set_page_dir(0);
+ flush_tlb();
+ printf("AP=(%u) reset page dir done\n", cpu);
+
+ /* Initialize machine_slot fields with the cpu data */
+ machine_slot[cpu].cpu_subtype = CPU_SUBTYPE_AT386;
+ machine_slot[cpu].cpu_type = machine_slot[0].cpu_type;
+
+ lapic_enable();
+ asm("sti");
+
+ slave_main();
+}
+
+void
+cpu_ap_main()
+{
+ do {
+ asm volatile ("pause" : : : "memory");
+ } while (!bspdone);
+
+ cpu_setup();
}
kern_return_t
cpu_start(int cpu)
{
- if (machine_slot[cpu].running)
- return KERN_FAILURE;
+ assert(machine_slot[cpu].running != TRUE);
+
+ uint16_t apic_id = apic_get_cpu_apic_id(cpu);
+
+ printf("Trying to enable: %d\n", apic_id);
+
+ smp_startup_cpu(apic_id, AP_BOOT_ADDR);
+
+ printf("Started cpu %d (lapic id %04x)\n", cpu, apic_id);
- return intel_startCPU(cpu);
+ return KERN_SUCCESS;
}
void
start_other_cpus(void)
{
- int cpu;
- for (cpu = 0; cpu < NCPUS; cpu++)
- if (cpu != cpu_number())
- cpu_start(cpu);
-}
+ unsigned long flags;
+
+ cpu_intr_save(&flags);
+
+ int ncpus = smp_get_numcpus();
+
+ //Copy cpu initialization assembly routine
+ memcpy((void*)phystokv(AP_BOOT_ADDR), (void*) &apboot,
+ (uint32_t)&apbootend - (uint32_t)&apboot);
+
+#ifndef APIC
+ lapic_enable(); /* Enable lapic only once */
+#endif
+ unsigned cpu;
+ bspdone = 0;
+ for (cpu = 1; cpu < ncpus; cpu++) {
+ machine_slot[cpu].running = FALSE;
+ //Start cpu
+ printf("Starting AP %d\n", cpu);
+ cpu_start(cpu);
+ }
+ printf("BSP: Completed SMP init\n");
+ bspdone = 1;
+
+ for (cpu = 1; cpu < ncpus; cpu++) {
+ do {
+ asm volatile ("pause" : : : "memory");
+ } while (machine_slot[cpu].running == FALSE);
+ }
+
+ cpu_intr_restore(flags);
+}
#endif /* NCPUS > 1 */
diff --git a/i386/i386/mp_desc.h b/i386/i386/mp_desc.h
index ebe1471d..59d50e77 100644
--- a/i386/i386/mp_desc.h
+++ b/i386/i386/mp_desc.h
@@ -27,6 +27,8 @@
#ifndef _I386_MP_DESC_H_
#define _I386_MP_DESC_H_
+#include <mach/kern_return.h>
+
#if MULTIPROCESSOR
/*
@@ -44,6 +46,8 @@
#include "gdt.h"
#include "ldt.h"
+#define AP_BOOT_ADDR 0x7000
+
/*
* The descriptor tables are together in a structure
* allocated one per processor (except for the boot processor).
@@ -70,11 +74,12 @@ extern struct task_tss *mp_ktss[NCPUS];
*/
extern struct real_descriptor *mp_gdt[NCPUS];
+extern uint8_t solid_intstack[];
/*
* Each CPU calls this routine to set up its descriptor tables.
*/
-extern struct mp_desc_table * mp_desc_init(int);
+extern int mp_desc_init(int);
extern void interrupt_processor(int cpu);
@@ -88,4 +93,6 @@ extern kern_return_t cpu_start(int cpu);
extern kern_return_t cpu_control(int cpu, const int *info, unsigned int count);
+extern void interrupt_stack_alloc(void);
+
#endif /* _I386_MP_DESC_H_ */
diff --git a/i386/i386/xen.h b/i386/i386/xen.h
index 8a17748a..dc8ca928 100644
--- a/i386/i386/xen.h
+++ b/i386/i386/xen.h
@@ -180,7 +180,7 @@ MACH_INLINE int hyp_mmu_update_pte(pt_entry_t pte,
pt_entry_t val)
#define HYP_BATCH_MMU_UPDATES 256
#define hyp_mmu_update_la(la, val) hyp_mmu_update_pte( \
- (kernel_page_dir[lin2pdenum_cont((vm_offset_t)(la))] & INTEL_PTE_PFN) \
+ (ap_page_dir[0][lin2pdenum_cont((vm_offset_t)(la))] & INTEL_PTE_PFN) \
+ ptenum((vm_offset_t)(la)) * sizeof(pt_entry_t), val)
#endif
diff --git a/i386/i386at/boothdr.S b/i386/i386at/boothdr.S
index a4830326..79d186eb 100644
--- a/i386/i386at/boothdr.S
+++ b/i386/i386at/boothdr.S
@@ -1,6 +1,6 @@
#include <mach/machine/asm.h>
-
+#include <i386/apic.h>
#include <i386/i386asm.h>
/*
@@ -54,7 +54,18 @@ boot_entry:
movw %ax,%ss
/* Switch to our own interrupt stack. */
- movl $_intstack+INTSTACK_SIZE,%esp
+ movl $solid_intstack+INTSTACK_SIZE-4, %esp
+ andl $0xfffffff0,%esp
+
+ /* Enable local apic */
+ xorl %eax, %eax
+ xorl %edx, %edx
+ movl $APIC_MSR, %ecx
+ rdmsr
+ orl $APIC_MSR_ENABLE, %eax
+ orl $APIC_MSR_BSP, %eax
+ movl $APIC_MSR, %ecx
+ wrmsr
/* Reset EFLAGS to a known state. */
pushl $0
@@ -91,9 +102,6 @@ iplt_done:
/* Jump into C code. */
call EXT(c_boot_entry)
- .comm _intstack,INTSTACK_SIZE
- .comm _eintstack,0
-
.align 16
.word 0
boot_gdt_descr:
diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c
index 1819526b..ad1128ca 100644
--- a/i386/i386at/model_dep.c
+++ b/i386/i386at/model_dep.c
@@ -134,11 +134,9 @@ extern char version[];
/* If set, reboot the system on ctrl-alt-delete. */
boolean_t rebootflag = FALSE; /* exported to kdintr */
-/* Interrupt stack. */
-static char int_stack[KERNEL_STACK_SIZE] __aligned(KERNEL_STACK_SIZE);
-#if NCPUS <= 1
-vm_offset_t int_stack_top[1], int_stack_base[1];
-#endif
+/* Interrupt stacks */
+extern vm_offset_t int_stack_top[], int_stack_base[];
+extern uint8_t solid_intstack[]; /* bottom */
#ifdef LINUX_DEV
extern void linux_init(void);
@@ -171,15 +169,20 @@ void machine_init(void)
hyp_init();
#else /* MACH_HYP */
+#if (NCPUS > 1)
+ acpi_apic_init();
+#endif
#if defined(APIC)
- if (acpi_apic_init() != ACPI_SUCCESS) {
- panic("APIC not found, unable to boot");
- }
ioapic_configure();
lapic_enable_timer();
+#else
+ startrtclock();
+#endif
#if (NCPUS > 1)
smp_init();
+#endif
+#if defined(APIC)
#warning FIXME: Rather unmask them from their respective drivers
/* kd */
unmask_irq(1);
@@ -187,8 +190,7 @@ void machine_init(void)
unmask_irq(4);
/* com1 */
unmask_irq(3);
-#endif /* NCPUS > 1 */
-#endif /* APIC */
+#endif
#ifdef LINUX_DEV
/*
@@ -364,8 +366,6 @@ register_boot_data(const struct multiboot_raw_info *mbi)
void
i386at_init(void)
{
- /* XXX move to intel/pmap.h */
- extern pt_entry_t *kernel_page_dir;
int i;
/*
@@ -444,7 +444,7 @@ i386at_init(void)
* Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS.
* XXX make the BIOS page (page 0) read-only.
*/
- pmap_bootstrap();
+ pmap_bootstrap(0);
/*
* Load physical segments into the VM system.
@@ -453,47 +453,8 @@ i386at_init(void)
*/
biosmem_setup();
- /*
- * We'll have to temporarily install a direct mapping
- * between physical memory and low linear memory,
- * until we start using our new kernel segment descriptors.
- */
-#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
- vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS -
LINEAR_MIN_KERNEL_ADDRESS;
- if ((vm_offset_t)(-delta) < delta)
- delta = (vm_offset_t)(-delta);
- int nb_direct = delta >> PDESHIFT;
- for (i = 0; i < nb_direct; i++)
- kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) +
i] =
-
kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS) + i];
-#endif
- /* We need BIOS memory mapped at 0xc0000 & co for BIOS accesses */
-#if VM_MIN_KERNEL_ADDRESS != 0
- kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS -
VM_MIN_KERNEL_ADDRESS)] =
- kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
-#endif
+ pmap_make_temporary_mapping(0);
-#ifdef MACH_PV_PAGETABLES
- for (i = 0; i < PDPNUM; i++)
- pmap_set_page_readonly_init((void*) kernel_page_dir + i *
INTEL_PGBYTES);
-#if PAE
- pmap_set_page_readonly_init(kernel_pmap->pdpbase);
-#endif /* PAE */
-#endif /* MACH_PV_PAGETABLES */
-#if PAE
-#ifdef __x86_64__
- set_cr3((unsigned long)_kvtophys(kernel_pmap->l4base));
-#else
- set_cr3((unsigned long)_kvtophys(kernel_pmap->pdpbase));
-#endif
-#ifndef MACH_HYP
- if (!CPU_HAS_FEATURE(CPU_FEATURE_PAE))
- panic("CPU doesn't have support for PAE.");
- set_cr4(get_cr4() | CR4_PAE);
-#endif /* MACH_HYP */
-#else
- set_cr3((unsigned long)_kvtophys(kernel_page_dir));
-#endif /* PAE */
#ifndef MACH_HYP
/* Turn paging on.
* Also set the WP bit so that on 486 or better processors
@@ -525,40 +486,13 @@ i386at_init(void)
mp_desc_init(0);
#endif // NCPUS
-#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
- /* Get rid of the temporary direct mapping and flush it out of the TLB.
*/
- for (i = 0 ; i < nb_direct; i++) {
-#ifdef MACH_XEN
-#ifdef MACH_PSEUDO_PHYS
- if
(!hyp_mmu_update_pte(kv_to_ma(&kernel_page_dir[lin2pdenum_cont(VM_MIN_KERNEL_ADDRESS)
+ i]), 0))
-#else /* MACH_PSEUDO_PHYS */
- if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS + i *
INTEL_PGBYTES, 0, UVMF_INVLPG | UVMF_ALL))
-#endif /* MACH_PSEUDO_PHYS */
- printf("couldn't unmap frame %d\n", i);
-#else /* MACH_XEN */
- kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) +
i] = 0;
-#endif /* MACH_XEN */
- }
-#endif
- /* Keep BIOS memory mapped */
-#if VM_MIN_KERNEL_ADDRESS != 0
- kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS -
VM_MIN_KERNEL_ADDRESS)] =
- kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
-#endif
-
- /* Not used after boot, better give it back. */
-#ifdef MACH_XEN
- hyp_free_page(0, (void*) VM_MIN_KERNEL_ADDRESS);
-#endif /* MACH_XEN */
-
- flush_tlb();
+ pmap_remove_temporary_mapping(0);
#ifdef MACH_XEN
hyp_p2m_init();
#endif /* MACH_XEN */
- int_stack_base[0] = (vm_offset_t)&int_stack;
- int_stack_top[0] = int_stack_base[0] + KERNEL_STACK_SIZE - 4;
+ interrupt_stack_alloc();
}
/*
@@ -650,7 +584,6 @@ void c_boot_entry(vm_offset_t bi)
#endif /* MACH_KDB */
machine_slot[0].is_cpu = TRUE;
- machine_slot[0].running = TRUE;
machine_slot[0].cpu_subtype = CPU_SUBTYPE_AT386;
switch (cpu_type)
@@ -698,6 +631,8 @@ startrtclock(void)
{
#ifndef APIC
clkstart();
+ asm ("sti");
+ unmask_irq(0);
#endif
}
diff --git a/i386/i386at/model_dep.h b/i386/i386at/model_dep.h
index a972695f..f72ddc3b 100644
--- a/i386/i386at/model_dep.h
+++ b/i386/i386at/model_dep.h
@@ -28,10 +28,9 @@
extern vm_offset_t int_stack_top[NCPUS], int_stack_base[NCPUS];
/* Check whether P points to the interrupt stack. */
-#define ON_INT_STACK(P) (((P) & ~(KERNEL_STACK_SIZE-1)) ==
int_stack_base[0])
+#define ON_INT_STACK(P) (((P) & ~(NCPUS*INTSTACK_SIZE-1)) ==
int_stack_base[0])
extern vm_offset_t timemmap(dev_t dev, vm_offset_t off, vm_prot_t prot);
-
void inittodr(void);
boolean_t init_alloc_aligned(vm_size_t size, vm_offset_t *addrp);
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
index 0f2ad641..490f8459 100644
--- a/i386/intel/pmap.c
+++ b/i386/intel/pmap.c
@@ -101,6 +101,8 @@
* Private data structures.
*/
+pt_entry_t *ap_page_dir[NCPUS] = { 0 };
+
/*
* For each vm_page_t, there is a list of all currently
* valid virtual mappings of that page. An entry is
@@ -394,6 +396,8 @@ boolean_t cpu_update_needed[NCPUS];
#define current_pmap() (vm_map_pmap(current_thread()->task->map))
#define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
+struct pmap tmp_pmap_store[NCPUS];
+pmap_t tmp_pmap[NCPUS];
struct pmap kernel_pmap_store;
pmap_t kernel_pmap;
@@ -416,12 +420,6 @@ int ptes_per_vm_page; /* number of
hardware ptes needed
unsigned int inuse_ptepages_count = 0; /* debugging */
-/*
- * Pointer to the basic page directory for the kernel.
- * Initialized by pmap_bootstrap().
- */
-pt_entry_t *kernel_page_dir;
-
/*
* Two slots for temporary physical page mapping, to allow for
* physical-to-physical transfers.
@@ -592,12 +590,13 @@ vm_offset_t pmap_map_bd(
* and direct-map all physical memory.
* Called with mapping off.
*/
-void pmap_bootstrap(void)
+void pmap_bootstrap(int cpu)
{
/*
* Mapping is turned off; we must reference only physical addresses.
* The load image of the system is to be mapped 1-1 physical = virtual.
*/
+ pmap_t mykernel_pmap;
/*
* Set ptes_per_vm_page for general use.
@@ -605,22 +604,24 @@ void pmap_bootstrap(void)
#if 0
ptes_per_vm_page = PAGE_SIZE / INTEL_PGBYTES;
#endif
-
- /*
- * The kernel's pmap is statically allocated so we don't
- * have to use pmap_create, which is unlikely to work
- * correctly at this part of the boot sequence.
- */
-
- kernel_pmap = &kernel_pmap_store;
+ if (cpu != 0) {
+ mykernel_pmap = tmp_pmap[cpu] = &tmp_pmap_store[cpu];
+ } else {
+ /*
+ * The kernel's pmap is statically allocated so we don't
+ * have to use pmap_create, which is unlikely to work
+ * correctly at this part of the boot sequence.
+ */
+ mykernel_pmap = kernel_pmap = tmp_pmap[0] = &kernel_pmap_store;
+ }
#if NCPUS > 1
lock_init(&pmap_system_lock, FALSE); /* NOT a sleep lock */
#endif /* NCPUS > 1 */
- simple_lock_init(&kernel_pmap->lock);
+ simple_lock_init(&mykernel_pmap->lock);
- kernel_pmap->ref_count = 1;
+ mykernel_pmap->ref_count = 1;
/*
* Determine the kernel virtual address range.
@@ -644,15 +645,15 @@ void pmap_bootstrap(void)
{
vm_offset_t addr;
init_alloc_aligned(PDPNUM * INTEL_PGBYTES, &addr);
- kernel_page_dir = (pt_entry_t*)phystokv(addr);
+ ap_page_dir[cpu] = (pt_entry_t*)phystokv(addr);
}
- kernel_pmap->pdpbase = (pt_entry_t*)phystokv(pmap_grab_page());
- memset(kernel_pmap->pdpbase, 0, INTEL_PGBYTES);
+ mykernel_pmap->pdpbase = (pt_entry_t*)phystokv(pmap_grab_page());
+ memset(mykernel_pmap->pdpbase, 0, INTEL_PGBYTES);
{
int i;
for (i = 0; i < PDPNUM; i++)
- WRITE_PTE(&kernel_pmap->pdpbase[i],
- pa_to_pte(_kvtophys((void *) kernel_page_dir
+ WRITE_PTE(&mykernel_pmap->pdpbase[i],
+ pa_to_pte(_kvtophys((void *) ap_page_dir[cpu]
+ i * INTEL_PGBYTES))
| INTEL_PTE_VALID
#if !defined(MACH_HYP) || defined(MACH_PV_PAGETABLES)
@@ -662,23 +663,23 @@ void pmap_bootstrap(void)
}
#ifdef __x86_64__
#ifdef MACH_HYP
- kernel_pmap->user_l4base = NULL;
- kernel_pmap->user_pdpbase = NULL;
+ mykernel_pmap->user_l4base = NULL;
+ mykernel_pmap->user_pdpbase = NULL;
#endif
- kernel_pmap->l4base = (pt_entry_t*)phystokv(pmap_grab_page());
- memset(kernel_pmap->l4base, 0, INTEL_PGBYTES);
- WRITE_PTE(&kernel_pmap->l4base[0],
pa_to_pte(_kvtophys(kernel_pmap->pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+ mykernel_pmap->l4base = (pt_entry_t*)phystokv(pmap_grab_page());
+ memset(mykernel_pmap->l4base, 0, INTEL_PGBYTES);
+ WRITE_PTE(&mykernel_pmap->l4base[0],
pa_to_pte(_kvtophys(mykernel_pmap->pdpbase)) | INTEL_PTE_VALID |
INTEL_PTE_WRITE);
#ifdef MACH_PV_PAGETABLES
- pmap_set_page_readonly_init(kernel_pmap->l4base);
+ pmap_set_page_readonly_init(mykernel_pmap->l4base);
#endif
#endif /* x86_64 */
#else /* PAE */
- kernel_pmap->dirbase = kernel_page_dir =
(pt_entry_t*)phystokv(pmap_grab_page());
+ mykernel_pmap->dirbase = ap_page_dir[cpu] =
(pt_entry_t*)phystokv(pmap_grab_page());
#endif /* PAE */
{
unsigned i;
for (i = 0; i < NPDES; i++)
- kernel_page_dir[i] = 0;
+ ap_page_dir[cpu][i] = 0;
}
#ifdef MACH_PV_PAGETABLES
@@ -754,7 +755,7 @@ void pmap_bootstrap(void)
*/
for (va = phystokv(0); va >= phystokv(0) && va <
kernel_virtual_end; )
{
- pt_entry_t *pde = kernel_page_dir +
lin2pdenum_cont(kvtolin(va));
+ pt_entry_t *pde = ap_page_dir[cpu] +
lin2pdenum_cont(kvtolin(va));
pt_entry_t *ptable =
(pt_entry_t*)phystokv(pmap_grab_page());
pt_entry_t *pte;
@@ -1261,7 +1262,7 @@ pmap_t pmap_create(vm_size_t size)
return PMAP_NULL;
}
memcpy(page_dir[i],
- (void *) kernel_page_dir + i * INTEL_PGBYTES,
+ (void *) ap_page_dir[0] + i * INTEL_PGBYTES,
INTEL_PGBYTES);
}
@@ -3030,3 +3031,96 @@ pmap_unmap_page_zero (void)
#endif /* MACH_PV_PAGETABLES */
}
#endif /* __i386__ */
+
+void
+pmap_make_temporary_mapping(int cpu)
+{
+ int i;
+
+ /*
+ * We'll have to temporarily install a direct mapping
+ * between physical memory and low linear memory,
+ * until we start using our new kernel segment descriptors.
+ */
+#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+ vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS -
LINEAR_MIN_KERNEL_ADDRESS;
+ if ((vm_offset_t)(-delta) < delta)
+ delta = (vm_offset_t)(-delta);
+ int nb_direct = delta >> PDESHIFT;
+ for (i = 0; i < nb_direct; i++)
+ ap_page_dir[cpu][lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) +
i] =
+
ap_page_dir[cpu][lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS) + i];
+#endif
+ /* We need BIOS memory mapped at 0xc0000 & co for BIOS accesses */
+#if VM_MIN_KERNEL_ADDRESS != 0
+ ap_page_dir[cpu][lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS -
VM_MIN_KERNEL_ADDRESS)] =
+ ap_page_dir[cpu][lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
+#endif
+
+#ifdef MACH_PV_PAGETABLES
+ for (i = 0; i < PDPNUM; i++)
+ pmap_set_page_readonly_init((void*) ap_page_dir[cpu] + i *
INTEL_PGBYTES);
+#if PAE
+ pmap_set_page_readonly_init(tmp_pmap[cpu]->pdpbase);
+#endif /* PAE */
+#endif /* MACH_PV_PAGETABLES */
+
+ pmap_set_page_dir(cpu);
+}
+
+void
+pmap_set_page_dir(int cpu)
+{
+#if PAE
+#ifdef __x86_64__
+ set_cr3((unsigned long)_kvtophys(tmp_pmap[cpu]->l4base));
+#else
+ set_cr3((unsigned long)_kvtophys(tmp_pmap[cpu]->pdpbase));
+#endif
+#ifndef MACH_HYP
+ if (!CPU_HAS_FEATURE(CPU_FEATURE_PAE))
+ panic("CPU doesn't have support for PAE.");
+ set_cr4(get_cr4() | CR4_PAE);
+#endif /* MACH_HYP */
+#else
+ set_cr3((unsigned long)_kvtophys(ap_page_dir[cpu]));
+#endif /* PAE */
+}
+
+void
+pmap_remove_temporary_mapping(int cpu)
+{
+ int i;
+
+#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+ vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS -
LINEAR_MIN_KERNEL_ADDRESS;
+ if ((vm_offset_t)(-delta) < delta)
+ delta = (vm_offset_t)(-delta);
+ int nb_direct = delta >> PDESHIFT;
+ /* Get rid of the temporary direct mapping and flush it out of the TLB.
*/
+ for (i = 0 ; i < nb_direct; i++) {
+#ifdef MACH_XEN
+#ifdef MACH_PSEUDO_PHYS
+ if
(!hyp_mmu_update_pte(kv_to_ma(&ap_page_dir[cpu][lin2pdenum_cont(VM_MIN_KERNEL_ADDRESS)
+ i]), 0))
+#else /* MACH_PSEUDO_PHYS */
+ if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS + i *
INTEL_PGBYTES, 0, UVMF_INVLPG | UVMF_ALL))
+#endif /* MACH_PSEUDO_PHYS */
+ printf("couldn't unmap frame %d\n", i);
+#else /* MACH_XEN */
+ ap_page_dir[cpu][lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) +
i] = 0;
+#endif /* MACH_XEN */
+ }
+#endif
+ /* Keep BIOS memory mapped */
+#if VM_MIN_KERNEL_ADDRESS != 0
+ ap_page_dir[cpu][lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS -
VM_MIN_KERNEL_ADDRESS)] =
+ ap_page_dir[cpu][lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
+#endif
+
+ /* Not used after boot, better give it back. */
+#ifdef MACH_XEN
+ hyp_free_page(0, (void*) VM_MIN_KERNEL_ADDRESS);
+#endif /* MACH_XEN */
+
+ flush_tlb();
+}
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
index bad640c1..2ec40268 100644
--- a/i386/intel/pmap.h
+++ b/i386/intel/pmap.h
@@ -474,13 +474,19 @@ pt_entry_t *pmap_pte(const pmap_t pmap, vm_offset_t addr);
#define pmap_attribute(pmap,addr,size,attr,value) \
(KERN_INVALID_ADDRESS)
+extern pt_entry_t *ap_page_dir[NCPUS];
+
/*
* Bootstrap the system enough to run with virtual memory.
* Allocate the kernel page directory and page tables,
* and direct-map all physical memory.
* Called with mapping off.
*/
-extern void pmap_bootstrap(void);
+extern void pmap_bootstrap(int cpu);
+
+extern void pmap_set_page_dir(int cpu);
+extern void pmap_make_temporary_mapping(int cpu);
+extern void pmap_remove_temporary_mapping(int cpu);
extern void pmap_unmap_page_zero (void);
diff --git a/linux/dev/arch/i386/kernel/irq.c b/linux/dev/arch/i386/kernel/irq.c
index 67feea84..6f99003e 100644
--- a/linux/dev/arch/i386/kernel/irq.c
+++ b/linux/dev/arch/i386/kernel/irq.c
@@ -31,6 +31,7 @@
#include <i386/spl.h>
#include <i386/irq.h>
#include <i386/pit.h>
+#include <i386/model_dep.h>
#define MACH_INCLUDE
#include <linux/mm.h>
@@ -421,7 +422,7 @@ reserve_mach_irqs (void)
{
unsigned int i;
- for (i = 0; i < NINTR; i++)
+ for (i = 1; i < NINTR; i++)
{
if (ivect[i] != intnull)
/* This dummy action does not specify SA_SHIRQ, so
@@ -707,7 +708,6 @@ void
init_IRQ (void)
{
char *p;
- int latch = (CLKNUM + hz / 2) / hz;
/*
* Ensure interrupts are disabled.
@@ -715,19 +715,12 @@ init_IRQ (void)
(void) splhigh ();
#ifndef APIC
- /*
- * Program counter 0 of 8253 to interrupt hz times per second.
- */
- outb_p (PIT_C0 | PIT_SQUAREMODE | PIT_READMODE, PITCTL_PORT);
- outb_p (latch & 0xff, PITCTR0_PORT);
- outb (latch >> 8, PITCTR0_PORT);
-#endif
-
/*
* Install our clock interrupt handler.
*/
old_clock_handler = ivect[0];
ivect[0] = linux_timer_intr;
+#endif
reserve_mach_irqs ();
diff --git a/linux/dev/init/main.c b/linux/dev/init/main.c
index 6d853957..207724f3 100644
--- a/linux/dev/init/main.c
+++ b/linux/dev/init/main.c
@@ -160,7 +160,9 @@ linux_init (void)
pcmcia_init ();
#endif
+#ifndef APIC
restore_IRQ ();
+#endif
linux_auto_config = 0;
}
--
2.34.1
[PATCH 3/4] Add cpu_number and cpuboot, Damien Zammit, 2022/11/11
[PATCH 4/4] i386: Refactor int stacks to be per cpu for SMP,
Damien Zammit <=