[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 1/3] pmap: dynamically allocate the whole user page tree map
From: |
Samuel Thibault |
Subject: |
Re: [PATCH 1/3] pmap: dynamically allocate the whole user page tree map |
Date: |
Sun, 21 May 2023 21:17:54 +0200 |
User-agent: |
NeoMutt/20170609 (1.8.3) |
Applied, thanks!!
Luca Dariz, le dim. 21 mai 2023 10:57:56 +0200, a ecrit:
> * i386/intel/pmap.c: switch to dynamic allocation of all the page tree
> map levels for the user-space address range, using a separate kmem
> cache for each level. This allows to extend the usable memory space
> on x86_64 to use more than one L3 page for user space. The kernel
> address map is left untouched for now as it needs a different
> initialization.
> * i386/intel/pmap.h: remove hardcoded user pages and add macro to
> recontruct the page-to-virtual mapping
> ---
> i386/intel/pmap.c | 544 ++++++++++++++++++++++------------------------
> i386/intel/pmap.h | 21 +-
> 2 files changed, 277 insertions(+), 288 deletions(-)
>
> diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
> index e867ed59..3a30271e 100644
> --- a/i386/intel/pmap.c
> +++ b/i386/intel/pmap.c
> @@ -398,6 +398,7 @@ struct pmap kernel_pmap_store;
> pmap_t kernel_pmap;
>
> struct kmem_cache pmap_cache; /* cache of pmap structures */
> +struct kmem_cache pt_cache; /* cache of page tables */
> struct kmem_cache pd_cache; /* cache of page directories */
> #if PAE
> struct kmem_cache pdpt_cache; /* cache of page directory pointer tables */
> @@ -429,6 +430,14 @@ pt_entry_t *kernel_page_dir;
> */
> static pmap_mapwindow_t mapwindows[PMAP_NMAPWINDOWS * NCPUS];
>
> +#ifdef __x86_64__
> +static inline pt_entry_t *
> +pmap_l4base(const pmap_t pmap, vm_offset_t lin_addr)
> +{
> + return &pmap->l4base[lin2l4num(lin_addr)];
> +}
> +#endif
> +
> #ifdef PAE
> static inline pt_entry_t *
> pmap_ptp(const pmap_t pmap, vm_offset_t lin_addr)
> @@ -443,7 +452,7 @@ pmap_ptp(const pmap_t pmap, vm_offset_t lin_addr)
> #else /* __x86_64__ */
> pdp_table = pmap->pdpbase;
> #endif /* __x86_64__ */
> - return pdp_table;
> + return &pdp_table[lin2pdpnum(lin_addr)];
> }
> #endif
>
> @@ -456,7 +465,9 @@ pmap_pde(const pmap_t pmap, vm_offset_t addr)
> #if PAE
> pt_entry_t *pdp_table;
> pdp_table = pmap_ptp(pmap, addr);
> - pt_entry_t pde = pdp_table[lin2pdpnum(addr)];
> + if (pdp_table == 0)
> + return(PT_ENTRY_NULL);
> + pt_entry_t pde = *pdp_table;
> if ((pde & INTEL_PTE_VALID) == 0)
> return PT_ENTRY_NULL;
> page_dir = (pt_entry_t *) ptetokv(pde);
> @@ -1092,15 +1103,18 @@ void pmap_init(void)
> */
> s = (vm_size_t) sizeof(struct pmap);
> kmem_cache_init(&pmap_cache, "pmap", s, 0, NULL, 0);
> - kmem_cache_init(&pd_cache, "pd",
> + kmem_cache_init(&pt_cache, "pmap_L1",
> + INTEL_PGBYTES, INTEL_PGBYTES, NULL,
> + KMEM_CACHE_PHYSMEM);
> + kmem_cache_init(&pd_cache, "pmap_L2",
> INTEL_PGBYTES, INTEL_PGBYTES, NULL,
> KMEM_CACHE_PHYSMEM);
> #if PAE
> - kmem_cache_init(&pdpt_cache, "pdpt",
> + kmem_cache_init(&pdpt_cache, "pmap_L3",
> INTEL_PGBYTES, INTEL_PGBYTES, NULL,
> KMEM_CACHE_PHYSMEM);
> #ifdef __x86_64__
> - kmem_cache_init(&l4_cache, "L4",
> + kmem_cache_init(&l4_cache, "pmap_L4",
> INTEL_PGBYTES, INTEL_PGBYTES, NULL,
> KMEM_CACHE_PHYSMEM);
> #endif /* __x86_64__ */
> @@ -1244,6 +1258,11 @@ pmap_page_table_page_dealloc(vm_offset_t pa)
> vm_object_lock(pmap_object);
> m = vm_page_lookup(pmap_object, pa);
> vm_page_lock_queues();
> +#ifdef MACH_PV_PAGETABLES
> + if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa)))
> + panic("couldn't unpin page %llx(%lx)\n", pa, (vm_offset_t)
> kv_to_ma(pa));
> + pmap_set_page_readwrite((void*) phystokv(pa));
> +#endif /* MACH_PV_PAGETABLES */
> vm_page_free(m);
> inuse_ptepages_count--;
> vm_page_unlock_queues();
> @@ -1265,7 +1284,7 @@ pmap_page_table_page_dealloc(vm_offset_t pa)
> pmap_t pmap_create(vm_size_t size)
> {
> #ifdef __x86_64__
> - // needs to be reworked if we want to dynamically allocate PDPs
> + // needs to be reworked if we want to dynamically allocate PDPs for
> kernel
> const int PDPNUM = PDPNUM_KERNEL;
> #endif
> pt_entry_t *page_dir[PDPNUM];
> @@ -1360,30 +1379,6 @@ pmap_t pmap_create(vm_size_t size)
> memset(p->l4base, 0, INTEL_PGBYTES);
> WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
> pa_to_pte(kvtophys((vm_offset_t) pdp_kernel)) |
> INTEL_PTE_VALID | INTEL_PTE_WRITE);
> -#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS)
> - // kernel vm and user vm are not in the same l4 entry, so add the user
> one
> - // TODO alloc only PDPTE for the user range VM_MIN_USER_ADDRESS,
> VM_MAX_USER_ADDRESS
> - // and keep the same for kernel range, in l4 table we have different
> entries
> - pt_entry_t *pdp_user = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache);
> - if (pdp_user == NULL) {
> - panic("pmap create");
> - }
> - memset(pdp_user, 0, INTEL_PGBYTES);
> - WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)],
> - pa_to_pte(kvtophys((vm_offset_t) pdp_user)) | INTEL_PTE_VALID
> | INTEL_PTE_WRITE | INTEL_PTE_USER);
> -#endif /* lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS)
> */
> - for (int i = 0; i < PDPNUM_USER; i++) {
> - pt_entry_t *user_page_dir = (pt_entry_t *)
> kmem_cache_alloc(&pd_cache);
> - memset(user_page_dir, 0, INTEL_PGBYTES);
> - WRITE_PTE(&pdp_user[i + lin2pdpnum(VM_MIN_USER_ADDRESS)], //
> pdp_user
> - pa_to_pte(kvtophys((vm_offset_t)user_page_dir))
> - | INTEL_PTE_VALID
> -#if (defined(__x86_64__) && !defined(MACH_HYP)) ||
> defined(MACH_PV_PAGETABLES)
> - | INTEL_PTE_WRITE | INTEL_PTE_USER
> -#endif
> - );
> - }
> -
> #ifdef MACH_PV_PAGETABLES
> // FIXME: use kmem_cache_alloc instead
> if (kmem_alloc_wired(kernel_map,
> @@ -1443,15 +1438,7 @@ pmap_t pmap_create(vm_size_t size)
>
> void pmap_destroy(pmap_t p)
> {
> -#if PAE
> - int i;
> -#endif
> - boolean_t free_all;
> - pt_entry_t *page_dir;
> - pt_entry_t *pdep;
> - phys_addr_t pa;
> int c, s;
> - vm_page_t m;
>
> if (p == PMAP_NULL)
> return;
> @@ -1466,87 +1453,54 @@ void pmap_destroy(pmap_t p)
> return; /* still in use */
> }
>
> + /*
> + * Free the page table tree.
> + */
> #if PAE
> - for (i = 0; i < lin2pdpnum(VM_MAX_USER_ADDRESS); i++) {
> #ifdef __x86_64__
> -#ifdef USER32
> - /* In this case we know we have one PDP for user space */
> - pt_entry_t *pdp = (pt_entry_t *)
> ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]);
> -#else
> -#warning "TODO do 64-bit userspace need more that 512G?"
> - pt_entry_t *pdp = (pt_entry_t *)
> ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]);
> -#endif /* USER32 */
> - page_dir = (pt_entry_t *) ptetokv(pdp[i]);
> + for (int l4i = 0; l4i < lin2l4num(VM_MAX_USER_ADDRESS); l4i++) {
> + pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
> + if (!(pdp & INTEL_PTE_VALID))
> + continue;
> + pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
> + for (int l3i = 0; l3i < 512; l3i++) {
> #else /* __x86_64__ */
> - page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
> + pt_entry_t *pdpbase = p->pdpbase;
> + for (int l3i = 0; l3i < lin2pdpnum(VM_MAX_USER_ADDRESS); l3i++)
> {
> #endif /* __x86_64__ */
> - free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
> + pt_entry_t pde = (pt_entry_t) pdpbase[l3i];
> + if (!(pde & INTEL_PTE_VALID))
> + continue;
> + pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
> + for (int l2i = 0; l2i < 512; l2i++) {
> #else /* PAE */
> - free_all = FALSE;
> - page_dir = p->dirbase;
> + pt_entry_t *pdebase = p->dirbase;
> + for (int l2i = 0; l2i <
> lin2pdenum(VM_MAX_USER_ADDRESS); l2i++) {
> #endif /* PAE */
> -
> -#ifdef __x86_64__
> -#warning FIXME 64bit need to free l3
> -#endif
> - /*
> - * Free the memory maps, then the
> - * pmap structure.
> - */
> - for (pdep = page_dir;
> - (free_all
> - || pdep < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)])
> - && pdep < &page_dir[NPTES];
> - pdep += ptes_per_vm_page) {
> - if (*pdep & INTEL_PTE_VALID) {
> - pa = pte_to_pa(*pdep);
> - assert(pa == (vm_offset_t) pa);
> - vm_object_lock(pmap_object);
> - m = vm_page_lookup(pmap_object, pa);
> - if (m == VM_PAGE_NULL)
> - panic("pmap_destroy: pte page not in object");
> - vm_page_lock_queues();
> -#ifdef MACH_PV_PAGETABLES
> - if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa)))
> - panic("pmap_destroy: couldn't unpin page %llx(%lx)\n",
> pa, (vm_offset_t) kv_to_ma(pa));
> - pmap_set_page_readwrite((void*) phystokv(pa));
> -#endif /* MACH_PV_PAGETABLES */
> - vm_page_free(m);
> - inuse_ptepages_count--;
> - vm_page_unlock_queues();
> - vm_object_unlock(pmap_object);
> - }
> - }
> -#ifdef MACH_PV_PAGETABLES
> - pmap_set_page_readwrite((void*) page_dir);
> -#endif /* MACH_PV_PAGETABLES */
> - kmem_cache_free(&pd_cache, (vm_offset_t) page_dir);
> + pt_entry_t pte = (pt_entry_t) pdebase[l2i];
> + if (!(pte & INTEL_PTE_VALID))
> + continue;
> + kmem_cache_free(&pt_cache,
> (vm_offset_t)ptetokv(pte));
> + }
> #if PAE
> - }
> -
> -#ifdef MACH_PV_PAGETABLES
> + kmem_cache_free(&pd_cache, (vm_offset_t)pdebase);
> + }
> #ifdef __x86_64__
> - pmap_set_page_readwrite(p->l4base);
> - pmap_set_page_readwrite(p->user_l4base);
> - pmap_set_page_readwrite(p->user_pdpbase);
> + kmem_cache_free(&pdpt_cache, (vm_offset_t)pdpbase);
> + }
> #endif /* __x86_64__ */
> - pmap_set_page_readwrite(p->pdpbase);
> -#endif /* MACH_PV_PAGETABLES */
> +#endif /* PAE */
>
> + /* Finally, free the page table tree root and the pmap itself */
> +#if PAE
> #ifdef __x86_64__
> - kmem_cache_free(&pdpt_cache, (vm_offset_t) pmap_ptp(p,
> VM_MIN_USER_ADDRESS));
> -#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS)
> - // TODO kernel vm and user vm are not in the same l4 entry
> -#endif
> kmem_cache_free(&l4_cache, (vm_offset_t) p->l4base);
> -#ifdef MACH_PV_PAGETABLES
> - kmem_free(kernel_map, (vm_offset_t)p->user_l4base, INTEL_PGBYTES);
> - kmem_free(kernel_map, (vm_offset_t)p->user_pdpbase, INTEL_PGBYTES);
> -#endif /* MACH_PV_PAGETABLES */
> #else /* __x86_64__ */
> - kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase);
> + kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase);
> #endif /* __x86_64__ */
> -#endif /* PAE */
> +#else /* PAE */
> + kmem_cache_free(&pd_cache, (vm_offset_t) p->dirbase);
> +#endif /* PAE */
> kmem_cache_free(&pmap_cache, (vm_offset_t) p);
> }
>
> @@ -1756,7 +1710,7 @@ void pmap_remove(
> l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
> if (l > e)
> l = e;
> - if (*pde & INTEL_PTE_VALID) {
> + if (pde && (*pde & INTEL_PTE_VALID)) {
> spte = (pt_entry_t *)ptetokv(*pde);
> spte = &spte[ptenum(s)];
> epte = &spte[intel_btop(l-s)];
> @@ -2036,86 +1990,24 @@ void pmap_protect(
> SPLX(spl);
> }
>
> +typedef pt_entry_t* (*pmap_level_getter_t)(const pmap_t pmap,
> vm_offset_t addr);
> /*
> - * Insert the given physical page (p) at
> - * the specified virtual address (v) in the
> - * target physical map with the protection requested.
> - *
> - * If specified, the page will be wired down, meaning
> - * that the related pte can not be reclaimed.
> - *
> - * NB: This is the only routine which MAY NOT lazy-evaluate
> - * or lose information. That is, this routine must actually
> - * insert this page into the given map NOW.
> - */
> -void pmap_enter(
> - pmap_t pmap,
> - vm_offset_t v,
> - phys_addr_t pa,
> - vm_prot_t prot,
> - boolean_t wired)
> +* Expand one single level of the page table tree
> +*/
> +static inline pt_entry_t* pmap_expand_level(pmap_t pmap, vm_offset_t v, int
> spl,
> + pmap_level_getter_t pmap_level,
> + pmap_level_getter_t
> pmap_level_upper,
> + int n_per_vm_page,
> + struct kmem_cache *cache)
> {
> - boolean_t is_physmem;
> pt_entry_t *pte;
> - pv_entry_t pv_h;
> - unsigned long i, pai;
> - pv_entry_t pv_e;
> - pt_entry_t template;
> - int spl;
> - phys_addr_t old_pa;
> -
> - assert(pa != vm_page_fictitious_addr);
> - if (pmap_debug) printf("pmap(%zx, %llx)\n", v, (unsigned long long) pa);
> - if (pmap == PMAP_NULL)
> - return;
> -
> -#if !MACH_KDB
> - if (pmap == kernel_pmap && (v < kernel_virtual_start || v >=
> kernel_virtual_end))
> - panic("pmap_enter(%zx, %llx) falls in physical memory area!\n",
> v, (unsigned long long) pa);
> -#endif
> -#if !(__i486__ || __i586__ || __i686__)
> - if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
> - && !wired /* hack for io_wire */ ) {
> - /*
> - * Because the 386 ignores write protection in kernel mode,
> - * we cannot enter a read-only kernel mapping, and must
> - * remove an existing mapping if changing it.
> - */
> - PMAP_READ_LOCK(pmap, spl);
> -
> - pte = pmap_pte(pmap, v);
> - if (pte != PT_ENTRY_NULL && *pte != 0) {
> - /*
> - * Invalidate the translation buffer,
> - * then remove the mapping.
> - */
> - pmap_remove_range(pmap, v, pte,
> - pte + ptes_per_vm_page);
> - PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
> - }
> - PMAP_READ_UNLOCK(pmap, spl);
> - return;
> - }
> -#endif
> -
> - /*
> - * Must allocate a new pvlist entry while we're unlocked;
> - * Allocating may cause pageout (which will lock the pmap system).
> - * If we determine we need a pvlist entry, we will unlock
> - * and allocate one. Then we will retry, throughing away
> - * the allocated entry later (if we no longer need it).
> - */
> - pv_e = PV_ENTRY_NULL;
> -Retry:
> - PMAP_READ_LOCK(pmap, spl);
>
> /*
> * Expand pmap to include this pte. Assume that
> * pmap is always expanded to include enough hardware
> * pages to map one VM page.
> */
> -
> - while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
> + while ((pte = pmap_level(pmap, v)) == PT_ENTRY_NULL) {
> /*
> * Need to allocate a new page-table page.
> */
> @@ -2136,7 +2028,9 @@ Retry:
> */
> PMAP_READ_UNLOCK(pmap, spl);
>
> - ptp = phystokv(pmap_page_table_page_alloc());
> + while (!(ptp = kmem_cache_alloc(cache)))
> + VM_PAGE_WAIT((void (*)()) 0);
> + memset((void *)ptp, 0, PAGE_SIZE);
>
> /*
> * Re-lock the pmap and check that another thread has
> @@ -2146,12 +2040,12 @@ Retry:
> */
> PMAP_READ_LOCK(pmap, spl);
>
> - if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
> + if (pmap_level(pmap, v) != PT_ENTRY_NULL) {
> /*
> * Oops...
> */
> PMAP_READ_UNLOCK(pmap, spl);
> - pmap_page_table_page_dealloc(kvtophys(ptp));
> + kmem_cache_free(cache, ptp);
> PMAP_READ_LOCK(pmap, spl);
> continue;
> }
> @@ -2159,8 +2053,8 @@ Retry:
> /*
> * Enter the new page table page in the page directory.
> */
> - i = ptes_per_vm_page;
> - pdp = pmap_pde(pmap, v);
> + i = n_per_vm_page;
> + pdp = pmap_level_upper(pmap, v);
> do {
> #ifdef MACH_PV_PAGETABLES
> pmap_set_page_readonly((void *) ptp);
> @@ -2185,6 +2079,100 @@ Retry:
> */
> continue;
> }
> + return pte;
> +}
> +
> +/*
> + * Expand, if required, the PMAP to include the virtual address V.
> + * PMAP needs to be locked, and it will be still locked on return. It
> + * can temporarily unlock the PMAP, during allocation or deallocation
> + * of physical pages.
> + */
> +static inline pt_entry_t* pmap_expand(pmap_t pmap, vm_offset_t v, int spl)
> +{
> +#ifdef PAE
> +#ifdef __x86_64__
> + pmap_expand_level(pmap, v, spl, pmap_ptp, pmap_l4base,
> ptes_per_vm_page, &pdpt_cache);
> +#endif /* __x86_64__ */
> + pmap_expand_level(pmap, v, spl, pmap_pde, pmap_ptp, ptes_per_vm_page,
> &pd_cache);
> +#endif /* PAE */
> + return pmap_expand_level(pmap, v, spl, pmap_pte, pmap_pde,
> ptes_per_vm_page, &pt_cache);
> +}
> +
> +/*
> + * Insert the given physical page (p) at
> + * the specified virtual address (v) in the
> + * target physical map with the protection requested.
> + *
> + * If specified, the page will be wired down, meaning
> + * that the related pte can not be reclaimed.
> + *
> + * NB: This is the only routine which MAY NOT lazy-evaluate
> + * or lose information. That is, this routine must actually
> + * insert this page into the given map NOW.
> + */
> +void pmap_enter(
> + pmap_t pmap,
> + vm_offset_t v,
> + phys_addr_t pa,
> + vm_prot_t prot,
> + boolean_t wired)
> +{
> + boolean_t is_physmem;
> + pt_entry_t *pte;
> + pv_entry_t pv_h;
> + unsigned long i, pai;
> + pv_entry_t pv_e;
> + pt_entry_t template;
> + int spl;
> + phys_addr_t old_pa;
> +
> + assert(pa != vm_page_fictitious_addr);
> + if (pmap_debug) printf("pmap(%zx, %llx)\n", v, (unsigned long long) pa);
> + if (pmap == PMAP_NULL)
> + return;
> +
> +#if !MACH_KDB
> + if (pmap == kernel_pmap && (v < kernel_virtual_start || v >=
> kernel_virtual_end))
> + panic("pmap_enter(%llx, %llx) falls in physical memory
> area!\n", v, (unsigned long long) pa);
> +#endif
> +#if !(__i486__ || __i586__ || __i686__)
> + if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
> + && !wired /* hack for io_wire */ ) {
> + /*
> + * Because the 386 ignores write protection in kernel mode,
> + * we cannot enter a read-only kernel mapping, and must
> + * remove an existing mapping if changing it.
> + */
> + PMAP_READ_LOCK(pmap, spl);
> +
> + pte = pmap_pte(pmap, v);
> + if (pte != PT_ENTRY_NULL && *pte != 0) {
> + /*
> + * Invalidate the translation buffer,
> + * then remove the mapping.
> + */
> + pmap_remove_range(pmap, v, pte,
> + pte + ptes_per_vm_page);
> + PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
> + }
> + PMAP_READ_UNLOCK(pmap, spl);
> + return;
> + }
> +#endif
> +
> + /*
> + * Must allocate a new pvlist entry while we're unlocked;
> + * Allocating may cause pageout (which will lock the pmap system).
> + * If we determine we need a pvlist entry, we will unlock
> + * and allocate one. Then we will retry, throughing away
> + * the allocated entry later (if we no longer need it).
> + */
> + pv_e = PV_ENTRY_NULL;
> +Retry:
> + PMAP_READ_LOCK(pmap, spl);
> +
> + pte = pmap_expand(pmap, v, spl);
>
> if (vm_page_ready())
> is_physmem = (vm_page_lookup_pa(pa) != NULL);
> @@ -2462,10 +2450,7 @@ void pmap_copy(
> */
> void pmap_collect(pmap_t p)
> {
> - int i;
> - boolean_t free_all;
> - pt_entry_t *page_dir;
> - pt_entry_t *pdp, *ptp;
> + pt_entry_t *ptp;
> pt_entry_t *eptp;
> phys_addr_t pa;
> int spl, wired;
> @@ -2476,119 +2461,104 @@ void pmap_collect(pmap_t p)
> if (p == kernel_pmap)
> return;
>
> + /*
> + * Free the page table tree.
> + */
> #if PAE
> - for (i = 0; i < lin2pdpnum(VM_MAX_USER_ADDRESS); i++) {
> #ifdef __x86_64__
> -#ifdef USER32
> - /* In this case we know we have one PDP for user space */
> - pdp = (pt_entry_t *)
> ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]);
> -#else
> -#warning "TODO do 64-bit userspace need more that 512G?"
> - pdp = (pt_entry_t *)
> ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]);
> -#endif /* USER32 */
> - page_dir = (pt_entry_t *) ptetokv(pdp[i]);
> + for (int l4i = 0; l4i < lin2l4num(VM_MAX_USER_ADDRESS); l4i++) {
> + pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
> + if (!(pdp & INTEL_PTE_VALID))
> + continue;
> + pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
> + for (int l3i = 0; l3i < 512; l3i++) {
> #else /* __x86_64__ */
> - page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
> + pt_entry_t *pdpbase = p->pdpbase;
> + for (int l3i = 0; l3i < lin2pdpnum(VM_MAX_USER_ADDRESS); l3i++)
> {
> #endif /* __x86_64__ */
> - free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
> -#else
> - i = 0;
> - free_all = FALSE;
> - page_dir = p->dirbase;
> -#endif
> -
> - /*
> - * Garbage collect map.
> - */
> - PMAP_READ_LOCK(p, spl);
> - for (pdp = page_dir;
> - (free_all
> - || pdp < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)])
> - && pdp < &page_dir[NPTES];
> - pdp += ptes_per_vm_page) {
> - if (*pdp & INTEL_PTE_VALID) {
> -
> - pa = pte_to_pa(*pdp);
> - ptp = (pt_entry_t *)phystokv(pa);
> - eptp = ptp + NPTES*ptes_per_vm_page;
> -
> - /*
> - * If the pte page has any wired mappings, we cannot
> - * free it.
> - */
> - wired = 0;
> - {
> - pt_entry_t *ptep;
> - for (ptep = ptp; ptep < eptp; ptep++) {
> - if (*ptep & INTEL_PTE_WIRED) {
> - wired = 1;
> - break;
> - }
> - }
> - }
> - if (!wired) {
> - /*
> - * Remove the virtual addresses mapped by this pte page.
> - */
> - { /*XXX big hack*/
> - vm_offset_t va = pdenum2lin(pdp - page_dir
> - + i * NPTES);
> - if (p == kernel_pmap)
> - va = lintokv(va);
> - pmap_remove_range(p,
> - va,
> - ptp,
> - eptp);
> - }
> -
> - /*
> - * Invalidate the page directory pointer.
> - */
> - {
> - int i = ptes_per_vm_page;
> - pt_entry_t *pdep = pdp;
> - do {
> + pt_entry_t pde = (pt_entry_t ) pdpbase[l3i];
> + if (!(pde & INTEL_PTE_VALID))
> + continue;
> + pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
> + for (int l2i = 0; l2i < 512; l2i++) {
> +#else /* PAE */
> + pt_entry_t *pdebase = p->dirbase;
> + for (int l2i = 0; l2i <
> lin2pdenum(VM_MAX_USER_ADDRESS); l2i++) {
> +#endif /* PAE */
> + pt_entry_t pte = (pt_entry_t) pdebase[l2i];
> + if (!(pte & INTEL_PTE_VALID))
> + continue;
> +
> + pa = pte_to_pa(pte);
> + ptp = (pt_entry_t *)phystokv(pa);
> + eptp = ptp + NPTES*ptes_per_vm_page;
> +
> + /*
> + * If the pte page has any wired mappings, we
> cannot
> + * free it.
> + */
> + wired = 0;
> + {
> + pt_entry_t *ptep;
> + for (ptep = ptp; ptep < eptp; ptep++) {
> + if (*ptep & INTEL_PTE_WIRED) {
> + wired = 1;
> + break;
> + }
> + }
> + }
> + if (!wired) {
> + /*
> + * Remove the virtual addresses mapped by
> this pte page.
> + */
> + { /*XXX big hack*/
> + vm_offset_t va = pagenum2lin(l4i, l3i,
> l2i, 0);
> + if (p == kernel_pmap)
> + va = lintokv(va);
> + pmap_remove_range(p, va, ptp, eptp);
> + }
> +
> + /*
> + * Invalidate the page directory pointer.
> + */
> + {
> + int i = ptes_per_vm_page;
> + pt_entry_t *pdep = &pdebase[l2i];
> + do {
> #ifdef MACH_PV_PAGETABLES
> - unsigned long pte = *pdep;
> - void *ptable = (void*) ptetokv(pte);
> - if
> (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0)))
> - panic("%s:%d could not clear pde
> %p\n",__FILE__,__LINE__,pdep-1);
> - if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE,
> kv_to_mfn(ptable)))
> - panic("couldn't unpin page %p(%lx)\n",
> ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable)));
> - pmap_set_page_readwrite(ptable);
> + unsigned long pte = *pdep;
> + void *ptable = (void*) ptetokv(pte);
> + if
> (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0)))
> + panic("%s:%d could not clear
> pde %p\n",__FILE__,__LINE__,pdep-1);
> + if (!hyp_mmuext_op_mfn
> (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable)))
> + panic("couldn't unpin page
> %p(%lx)\n", ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable)));
> + pmap_set_page_readwrite(ptable);
> #else /* MACH_PV_PAGETABLES */
> - *pdep++ = 0;
> + *pdep++ = 0;
> #endif /* MACH_PV_PAGETABLES */
> - } while (--i > 0);
> - }
> + } while (--i > 0);
> + }
>
> - PMAP_READ_UNLOCK(p, spl);
> + PMAP_READ_UNLOCK(p, spl);
>
> - /*
> - * And free the pte page itself.
> - */
> - {
> - vm_page_t m;
> -
> - vm_object_lock(pmap_object);
> - assert(pa == (vm_offset_t) pa);
> - m = vm_page_lookup(pmap_object, pa);
> - if (m == VM_PAGE_NULL)
> - panic("pmap_collect: pte page not in object");
> - vm_page_lock_queues();
> - vm_page_free(m);
> - inuse_ptepages_count--;
> - vm_page_unlock_queues();
> - vm_object_unlock(pmap_object);
> - }
> + /*
> + * And free the pte page itself.
> + */
> + kmem_cache_free(&pt_cache,
> (vm_offset_t)ptetokv(pte));
>
> - PMAP_READ_LOCK(p, spl);
> - }
> - }
> - }
> + PMAP_READ_LOCK(p, spl);
> +
> + }
> + }
> #if PAE
> + // TODO check l2?
> + }
> +#ifdef __x86_64__
> + // TODO check l3?
> }
> -#endif
> +#endif /* __x86_64__ */
> +#endif /* PAE */
> +
> PMAP_UPDATE_TLBS(p, VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
>
> PMAP_READ_UNLOCK(p, spl);
> diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
> index 4c1b9bd5..5fc7fb25 100644
> --- a/i386/intel/pmap.h
> +++ b/i386/intel/pmap.h
> @@ -75,7 +75,6 @@ typedef phys_addr_t pt_entry_t;
> #define L4SHIFT 39 /* L4 shift */
> #define L4MASK 0x1ff /* mask for L4 index */
> #define PDPNUM_KERNEL (((VM_MAX_KERNEL_ADDRESS -
> VM_MIN_KERNEL_ADDRESS) >> PDPSHIFT) + 1)
> -#define PDPNUM_USER (((VM_MAX_USER_ADDRESS - VM_MIN_USER_ADDRESS) >>
> PDPSHIFT) + 1)
> #define PDPMASK 0x1ff /* mask for page directory pointer
> index */
> #else /* __x86_64__ */
> #define PDPNUM 4 /* number of page directory pointers */
> @@ -130,6 +129,26 @@ typedef phys_addr_t pt_entry_t;
> */
> #define pdenum2lin(a) ((vm_offset_t)(a) << PDESHIFT)
>
> +#if PAE
> +#ifdef __x86_64__
> +#define pagenum2lin(l4num, l3num, l2num, l1num) \
> + (((vm_offset_t)(l4num) << L4SHIFT) + \
> + ((vm_offset_t)(l3num) << PDPSHIFT) + \
> + ((vm_offset_t)(l2num) << PDESHIFT) + \
> + ((vm_offset_t)(l1num) << PTESHIFT))
> +#else /* __x86_64__ */
> +#define pagenum2lin(l4num, l3num, l2num, l1num) \
> + (((vm_offset_t)(l3num) << PDPSHIFT) + \
> + ((vm_offset_t)(l2num) << PDESHIFT) + \
> + ((vm_offset_t)(l1num) << PTESHIFT))
> +#endif
> +#else /* PAE */
> +#define pagenum2lin(l4num, l3num, l2num, l1num) \
> + (((vm_offset_t)(l2num) << PDESHIFT) + \
> + ((vm_offset_t)(l1num) << PTESHIFT))
> +#endif
> +
> +
> /*
> * Convert linear offset to page table index
> */
> --
> 2.30.2
>
>
--
Samuel
---
Pour une évaluation indépendante, transparente et rigoureuse !
Je soutiens la Commission d'Évaluation de l'Inria.