[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v3] i386: Refactor int stacks to be per cpu for SMP
From: |
Samuel Thibault |
Subject: |
Re: [PATCH v3] i386: Refactor int stacks to be per cpu for SMP |
Date: |
Mon, 13 Feb 2023 01:31:04 +0100 |
User-agent: |
NeoMutt/20170609 (1.8.3) |
Applied and fixed for PAE, thanks!
Damien Zammit, le sam. 04 févr. 2023 01:15:06 +0000, a ecrit:
> This also serialises the AP bringup, so paging can be enabled per cpu
> one by one.
>
> Also-by: Almudena Garcia <liberamenso10000@gmail.com>
> ---
> i386/i386/mp_desc.c | 230 ++++++++++++++++++++++++++++------------
> i386/i386/mp_desc.h | 7 +-
> i386/i386at/boothdr.S | 18 +++-
> i386/i386at/ioapic.c | 5 +-
> i386/i386at/model_dep.c | 33 +++---
> 5 files changed, 201 insertions(+), 92 deletions(-)
>
> diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c
> index bcf2fbe7..03b2ea68 100644
> --- a/i386/i386/mp_desc.c
> +++ b/i386/i386/mp_desc.c
> @@ -24,25 +24,36 @@
> * the rights to redistribute these changes.
> */
>
> -#if NCPUS > 1
> -
> -#include <string.h>
> -
> #include <kern/cpu_number.h>
> #include <kern/debug.h>
> #include <kern/printf.h>
> +#include <kern/smp.h>
> +#include <kern/startup.h>
> +#include <kern/kmutex.h>
> #include <mach/machine.h>
> #include <mach/xen.h>
> #include <vm/vm_kern.h>
>
> #include <i386/mp_desc.h>
> #include <i386/lock.h>
> +#include <i386/apic.h>
> +#include <i386/locore.h>
> +#include <i386/gdt.h>
> +#include <i386at/idt.h>
> +#include <i386at/int_init.h>
> +#include <i386/cpu.h>
> +#include <i386/smp.h>
> +
> #include <i386at/model_dep.h>
> #include <machine/ktss.h>
> +#include <machine/smp.h>
> #include <machine/tss.h>
> #include <machine/io_perm.h>
> #include <machine/vm_param.h>
>
> +#include <i386at/acpi_parse_apic.h>
> +#include <string.h>
> +
> /*
> * The i386 needs an interrupt stack to keep the PCB stack from being
> * overrun by interrupts. All interrupt stacks MUST lie at lower addresses
> @@ -52,20 +63,35 @@
> /*
> * Addresses of bottom and top of interrupt stacks.
> */
> -vm_offset_t interrupt_stack[NCPUS];
> vm_offset_t int_stack_top[NCPUS];
> vm_offset_t int_stack_base[NCPUS];
>
> -/*
> - * Barrier address.
> - */
> -vm_offset_t int_stack_high;
> +/* Interrupt stack allocation */
> +uint8_t solid_intstack[NCPUS*INTSTACK_SIZE] __aligned(NCPUS*INTSTACK_SIZE);
> +
> +void
> +interrupt_stack_alloc(void)
> +{
> + int i;
> +
> + /*
> + * Set up pointers to the top of the interrupt stack.
> + */
> +
> + for (i = 0; i < NCPUS; i++) {
> + int_stack_base[i] = (vm_offset_t) &solid_intstack[i *
> INTSTACK_SIZE];
> + int_stack_top[i] = (vm_offset_t) &solid_intstack[(i + 1) *
> INTSTACK_SIZE] - 4;
> + }
> +}
>
> +#if NCPUS > 1
> /*
> - * First cpu`s interrupt stack.
> + * Flag to mark SMP init by BSP complete
> */
> -extern char _intstack[]; /* bottom */
> -extern char _eintstack[]; /* top */
> +int bspdone;
> +
> +extern void *apboot, *apbootend;
> +extern volatile ApicLocalUnit* lapic;
>
> /*
> * Multiprocessor i386/i486 systems use a separate copy of the
> @@ -77,7 +103,7 @@ extern char _eintstack[]; /* top */
> */
>
> /*
> - * Allocated descriptor tables.
> + * Descriptor tables.
> */
> struct mp_desc_table *mp_desc_table[NCPUS] = { 0 };
>
> @@ -102,12 +128,13 @@ extern struct real_descriptor ldt[LDTSZ];
> * Allocate and initialize the per-processor descriptor tables.
> */
>
> -struct mp_desc_table *
> +int
> mp_desc_init(int mycpu)
> {
> struct mp_desc_table *mpt;
> + vm_offset_t mem;
>
> - if (mycpu == master_cpu) {
> + if (mycpu == 0) {
> /*
> * Master CPU uses the tables built at boot time.
> * Just set the TSS and GDT pointers.
> @@ -118,61 +145,28 @@ mp_desc_init(int mycpu)
> }
> else {
> /*
> - * Other CPUs allocate the table from the bottom of
> - * the interrupt stack.
> + * Allocate tables for other CPUs
> */
> - mpt = (struct mp_desc_table *) interrupt_stack[mycpu];
> + if (!init_alloc_aligned(sizeof(struct mp_desc_table), &mem))
> + panic("not enough memory for descriptor tables");
> + mpt = (struct mp_desc_table *)phystokv(mem);
>
> mp_desc_table[mycpu] = mpt;
> mp_ktss[mycpu] = &mpt->ktss;
> mp_gdt[mycpu] = mpt->gdt;
>
> /*
> - * Copy the tables
> + * Zero the tables
> */
> - memcpy(mpt->idt,
> - idt,
> - sizeof(idt));
> - memcpy(mpt->gdt,
> - gdt,
> - sizeof(gdt));
> - memcpy(mpt->ldt,
> - ldt,
> - sizeof(ldt));
> - memset(&mpt->ktss, 0,
> - sizeof(struct task_tss));
> + memset(mpt->idt, 0, sizeof(idt));
> + memset(mpt->gdt, 0, sizeof(gdt));
> + memset(mpt->ldt, 0, sizeof(ldt));
> + memset(&mpt->ktss, 0, sizeof(struct task_tss));
>
> - /*
> - * Fix up the entries in the GDT to point to
> - * this LDT and this TSS.
> - */
> -#ifdef MACH_RING1
> - panic("TODO %s:%d\n",__FILE__,__LINE__);
> -#else /* MACH_RING1 */
> - _fill_gdt_sys_descriptor(mpt->gdt, KERNEL_LDT,
> - (unsigned)&mpt->ldt,
> - LDTSZ * sizeof(struct real_descriptor) - 1,
> - ACC_P|ACC_PL_K|ACC_LDT, 0);
> - _fill_gdt_sys_descriptor(mpt->gdt, KERNEL_TSS,
> - (unsigned)&mpt->ktss,
> - sizeof(struct task_tss) - 1,
> - ACC_P|ACC_PL_K|ACC_TSS, 0);
> -
> - mpt->ktss.tss.ss0 = KERNEL_DS;
> - mpt->ktss.tss.io_bit_map_offset = IOPB_INVAL;
> - mpt->ktss.barrier = 0xFF;
> -#endif /* MACH_RING1 */
> -
> - return mpt;
> + return mycpu;
> }
> }
>
> -static kern_return_t intel_startCPU(int slot_num)
> -{
> - printf("TODO: intel_startCPU\n");
> - return KERN_FAILURE;
> -}
> -
> /* XXX should be adjusted per CPU speed */
> int simple_lock_pause_loop = 100;
>
> @@ -206,24 +200,128 @@ void
> interrupt_processor(int cpu)
> {
> printf("interrupt cpu %d\n",cpu);
> + smp_pmap_update(apic_get_cpu_apic_id(cpu));
> +}
> +
> +static void
> +paging_enable(void)
> +{
> +#ifndef MACH_HYP
> + /* Turn paging on.
> + * TODO: Why does setting the WP bit here cause a crash?
> + */
> + set_cr0(get_cr0() | CR0_PG /* | CR0_WP */);
> + set_cr0(get_cr0() & ~(CR0_CD | CR0_NW));
> + if (CPU_HAS_FEATURE(CPU_FEATURE_PGE))
> + set_cr4(get_cr4() | CR4_PGE);
> +#endif /* MACH_HYP */
> +}
> +
> +void
> +cpu_setup(int cpu)
> +{
> + pmap_make_temporary_mapping();
> + printf("AP=(%u) tempmap done\n", cpu);
> +
> + paging_enable();
> + flush_instr_queue();
> + printf("AP=(%u) paging done\n", cpu);
> +
> + mp_desc_init(cpu);
> + printf("AP=(%u) mpdesc done\n", cpu);
> +
> + ap_gdt_init(cpu);
> + printf("AP=(%u) gdt done\n", cpu);
> +
> + ap_idt_init(cpu);
> + printf("AP=(%u) idt done\n", cpu);
> +
> + ap_int_init(cpu);
> + printf("AP=(%u) int done\n", cpu);
> +
> + ap_ldt_init(cpu);
> + printf("AP=(%u) ldt done\n", cpu);
> +
> + ap_ktss_init(cpu);
> + printf("AP=(%u) ktss done\n", cpu);
> +
> + pmap_remove_temporary_mapping();
> + printf("AP=(%u) remove tempmap done\n", cpu);
> +
> + pmap_set_page_dir();
> + flush_tlb();
> + printf("AP=(%u) reset page dir done\n", cpu);
> +
> + /* Initialize machine_slot fields with the cpu data */
> + machine_slot[cpu].cpu_subtype = CPU_SUBTYPE_AT386;
> + machine_slot[cpu].cpu_type = machine_slot[0].cpu_type;
> +
> + lapic_enable();
> + cpu_launch_first_thread(THREAD_NULL);
> +}
> +
> +void
> +cpu_ap_main()
> +{
> + unsigned apic_id = (((ApicLocalUnit*)phystokv(lapic_addr))->apic_id.r >>
> 24) & 0xff;
> + int cpu = apic_get_cpu_kernel_id(apic_id);
> +
> + do {
> + asm volatile ("pause" : : : "memory");
> + } while (bspdone != cpu);
> +
> + __sync_synchronize();
> +
> + cpu_setup(cpu);
> }
>
> kern_return_t
> cpu_start(int cpu)
> {
> - if (machine_slot[cpu].running)
> - return KERN_FAILURE;
> + assert(machine_slot[cpu].running != TRUE);
> +
> + uint16_t apic_id = apic_get_cpu_apic_id(cpu);
> +
> + printf("Trying to enable: %d\n", apic_id);
>
> - return intel_startCPU(cpu);
> + smp_startup_cpu(apic_id, AP_BOOT_ADDR);
> +
> + printf("Started cpu %d (lapic id %04x)\n", cpu, apic_id);
> +
> + return KERN_SUCCESS;
> }
>
> void
> start_other_cpus(void)
> {
> - int cpu;
> - for (cpu = 0; cpu < NCPUS; cpu++)
> - if (cpu != cpu_number())
> - cpu_start(cpu);
> -}
> + int ncpus = smp_get_numcpus();
> +
> + //Copy cpu initialization assembly routine
> + memcpy((void*)phystokv(AP_BOOT_ADDR), (void*) &apboot,
> + (uint32_t)&apbootend - (uint32_t)&apboot);
>
> +#ifndef APIC
> + lapic_enable(); /* Enable lapic only once */
> +#endif
> + unsigned cpu;
> +
> + splhigh();
> +
> + bspdone = 0;
> + for (cpu = 1; cpu < ncpus; cpu++) {
> + machine_slot[cpu].running = FALSE;
> +
> + //Start cpu
> + printf("Starting AP %d\n", cpu);
> + cpu_start(cpu);
> +
> + bspdone++;
> + do {
> + asm volatile ("pause" : : : "memory");
> + } while (machine_slot[cpu].running == FALSE);
> +
> + __sync_synchronize();
> + }
> + printf("BSP: Completed SMP init\n");
> +}
> #endif /* NCPUS > 1 */
> diff --git a/i386/i386/mp_desc.h b/i386/i386/mp_desc.h
> index ede8775f..59d50e77 100644
> --- a/i386/i386/mp_desc.h
> +++ b/i386/i386/mp_desc.h
> @@ -46,6 +46,8 @@
> #include "gdt.h"
> #include "ldt.h"
>
> +#define AP_BOOT_ADDR 0x7000
> +
> /*
> * The descriptor tables are together in a structure
> * allocated one per processor (except for the boot processor).
> @@ -72,11 +74,12 @@ extern struct task_tss *mp_ktss[NCPUS];
> */
> extern struct real_descriptor *mp_gdt[NCPUS];
>
> +extern uint8_t solid_intstack[];
>
> /*
> * Each CPU calls this routine to set up its descriptor tables.
> */
> -extern struct mp_desc_table * mp_desc_init(int);
> +extern int mp_desc_init(int);
>
>
> extern void interrupt_processor(int cpu);
> @@ -90,4 +93,6 @@ extern kern_return_t cpu_start(int cpu);
>
> extern kern_return_t cpu_control(int cpu, const int *info, unsigned int
> count);
>
> +extern void interrupt_stack_alloc(void);
> +
> #endif /* _I386_MP_DESC_H_ */
> diff --git a/i386/i386at/boothdr.S b/i386/i386at/boothdr.S
> index 82d4b34a..d1d1fa51 100644
> --- a/i386/i386at/boothdr.S
> +++ b/i386/i386at/boothdr.S
> @@ -1,6 +1,6 @@
>
> #include <mach/machine/asm.h>
> -
> +#include <i386/apic.h>
> #include <i386/i386asm.h>
>
> /*
> @@ -54,7 +54,18 @@ boot_entry:
> movw %ax,%ss
>
> /* Switch to our own interrupt stack. */
> - movl $_intstack+INTSTACK_SIZE,%esp
> + movl $solid_intstack+INTSTACK_SIZE-4, %esp
> + andl $0xfffffff0,%esp
> +
> + /* Enable local apic */
> + xorl %eax, %eax
> + xorl %edx, %edx
> + movl $APIC_MSR, %ecx
> + rdmsr
> + orl $APIC_MSR_ENABLE, %eax
> + orl $APIC_MSR_BSP, %eax
> + movl $APIC_MSR, %ecx
> + wrmsr
>
> /* Reset EFLAGS to a known state. */
> pushl $0
> @@ -91,9 +102,6 @@ iplt_done:
> /* Jump into C code. */
> call EXT(c_boot_entry)
>
> - .comm _intstack,INTSTACK_SIZE
> - .comm _eintstack,0
> -
> .align 16
> .word 0
> boot_gdt_descr:
> diff --git a/i386/i386at/ioapic.c b/i386/i386at/ioapic.c
> index 003690ed..f7b0d1d3 100644
> --- a/i386/i386at/ioapic.c
> +++ b/i386/i386at/ioapic.c
> @@ -186,9 +186,8 @@ lapic_enable_timer(void)
> /* Some buggy hardware requires this set again */
> lapic->divider_config.r = LAPIC_TIMER_DIVIDE_16;
>
> - /* Enable interrupts for the first time on BSP */
> - asm("sti");
> - printf("LAPIC timer configured\n");
> + /* Enable interrupts for the first time */
> + printf("LAPIC timer configured on cpu%d\n", cpu_number());
> }
>
> void
> diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c
> index 98408c06..e0f669b6 100644
> --- a/i386/i386at/model_dep.c
> +++ b/i386/i386at/model_dep.c
> @@ -134,11 +134,9 @@ extern char version[];
> /* If set, reboot the system on ctrl-alt-delete. */
> boolean_t rebootflag = FALSE; /* exported to kdintr */
>
> -/* Interrupt stack. */
> -static char int_stack[INTSTACK_SIZE] __aligned(INTSTACK_SIZE);
> -#if NCPUS <= 1
> -vm_offset_t int_stack_top[1], int_stack_base[1];
> -#endif
> +/* Interrupt stacks */
> +extern vm_offset_t int_stack_top[], int_stack_base[];
> +extern uint8_t solid_intstack[]; /* bottom */
>
> #ifdef LINUX_DEV
> extern void linux_init(void);
> @@ -171,15 +169,16 @@ void machine_init(void)
> hyp_init();
> #else /* MACH_HYP */
>
> -#if defined(APIC)
> - if (acpi_apic_init() != ACPI_SUCCESS) {
> - panic("APIC not found, unable to boot");
> - }
> - ioapic_configure();
> - lapic_enable_timer();
> #if (NCPUS > 1)
> + acpi_apic_init();
> smp_init();
> +#endif
> +#if defined(APIC)
> + ioapic_configure();
> +#endif
> + startrtclock();
>
> +#if defined(APIC)
> #warning FIXME: Rather unmask them from their respective drivers
> /* kd */
> unmask_irq(1);
> @@ -187,8 +186,7 @@ void machine_init(void)
> unmask_irq(4);
> /* com1 */
> unmask_irq(3);
> -#endif /* NCPUS > 1 */
> -#endif /* APIC */
> +#endif
>
> #ifdef LINUX_DEV
> /*
> @@ -483,8 +481,7 @@ i386at_init(void)
> hyp_p2m_init();
> #endif /* MACH_XEN */
>
> - int_stack_base[0] = (vm_offset_t)&int_stack;
> - int_stack_top[0] = int_stack_base[0] + INTSTACK_SIZE - 4;
> + interrupt_stack_alloc();
> }
>
> /*
> @@ -576,7 +573,6 @@ void c_boot_entry(vm_offset_t bi)
> #endif /* MACH_KDB */
>
> machine_slot[0].is_cpu = TRUE;
> - machine_slot[0].running = TRUE;
> machine_slot[0].cpu_subtype = CPU_SUBTYPE_AT386;
>
> switch (cpu_type)
> @@ -622,8 +618,11 @@ timemmap(dev_t dev, vm_offset_t off, vm_prot_t prot)
> void
> startrtclock(void)
> {
> -#ifndef APIC
> +#ifdef APIC
> + lapic_enable_timer();
> +#else
> clkstart();
> + unmask_irq(0);
> #endif
> }
>
> --
> 2.34.1
>
>
>
--
Samuel
---
Pour une évaluation indépendante, transparente et rigoureuse !
Je soutiens la Commission d'Évaluation de l'Inria.