commit-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnumach] 01/02: New upstream version 1.7+git20160921


From: Samuel Thibault
Subject: [gnumach] 01/02: New upstream version 1.7+git20160921
Date: Wed, 21 Sep 2016 00:01:23 +0000

This is an automated email from the git hooks/post-receive script.

sthibault pushed a commit to branch master
in repository gnumach.

commit bc2d3a9079a479120fd19433ea373ec44f83d10f
Author: Samuel Thibault <address@hidden>
Date:   Tue Sep 20 22:58:24 2016 +0000

    New upstream version 1.7+git20160921
---
 ChangeLog                    |  520 +++++++++++++++
 Makefile.am                  |    4 +-
 Makefile.in                  |   30 +-
 configure                    |   20 +-
 device/ds_routines.c         |    4 +-
 doc/mach.info                |  242 +++----
 doc/mach.info-1              |   26 +-
 doc/mach.info-2              |    6 +-
 doc/mach.texi                |    2 +-
 doc/stamp-vti                |    8 +-
 doc/version.texi             |    8 +-
 i386/Makefrag.am             |    1 +
 i386/i386/db_trace.c         |   19 +-
 i386/i386/locore.S           |    2 +
 i386/i386/model_dep.h        |    7 -
 i386/i386/phys.c             |   22 +-
 i386/i386/strings.c          |  150 +++++
 i386/i386/trap.c             |   10 -
 i386/i386at/biosmem.c        |  586 +++++++++-------
 i386/i386at/biosmem.h        |   47 +-
 i386/i386at/boothdr.S        |    4 +-
 i386/i386at/interrupt.S      |    1 +
 i386/i386at/mem.c            |   22 +-
 i386/i386at/model_dep.c      |   93 ++-
 i386/include/mach/i386/asm.h |   14 +-
 i386/intel/pmap.c            |  150 ++---
 i386/intel/pmap.h            |   12 +-
 i386/xen/xen.c               |    2 +-
 ipc/ipc_init.c               |    2 +-
 ipc/mach_debug.c             |    3 +
 kern/lock.c                  |    3 +
 kern/slab.c                  |    4 +-
 kern/startup.c               |    1 -
 kern/strings.c               |  103 +++
 kern/task.c                  |    2 +-
 kern/thread.c                |    6 +-
 kern/thread.h                |    5 +-
 linux/dev/glue/block.c       |   35 +-
 linux/dev/glue/net.c         |   63 +-
 linux/dev/init/main.c        |    2 +-
 version.m4                   |    2 +-
 vm/pmap.h                    |   32 +-
 vm/vm_fault.c                |    8 +-
 vm/vm_kern.c                 |    8 +-
 vm/vm_kern.h                 |    2 +-
 vm/vm_map.c                  |  124 ++--
 vm/vm_map.h                  |   16 +-
 vm/vm_page.c                 | 1507 +++++++++++++++++++++++++++++++++++++++---
 vm/vm_page.h                 |  184 +++---
 vm/vm_pageout.c              |  690 +++----------------
 vm/vm_pageout.h              |    4 +-
 vm/vm_resident.c             |  385 +++--------
 xen/block.c                  |   51 +-
 xen/console.c                |    4 +-
 xen/net.c                    |   49 +-
 55 files changed, 3388 insertions(+), 1919 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index dce289c..74da3e8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,523 @@
+2016-09-21  Richard Braun  <address@hidden>
+
+       Enable high memory
+       * i386/i386at/biosmem.c (biosmem_setup): Load the HIGHMEM segment if
+       present.
+       (biosmem_free_usable): Report high memory as usable.
+       * vm/vm_page.c (vm_page_boot_table_size, vm_page_table_size,
+       vm_page_mem_size, vm_page_mem_free): Scan all segments.
+       * vm/vm_resident.c (vm_page_grab): Describe allocation strategy
+       with regard to the HIGHMEM segment.
+
+2016-09-21  Richard Braun  <address@hidden>
+
+       Update device drivers for highmem support
+       Unconditionally use bounce buffers for now.
+
+       * linux/dev/glue/net.c (device_write): Unconditionally use a
+       bounce buffer.
+       * xen/block.c (device_write): Likewise.
+       * xen/net.c: Include <device/ds_routines.h>.
+       (device_write): Unconditionally use a bounce buffer.
+
+2016-09-21  Richard Braun  <address@hidden>
+
+       Update Linux block layer glue code
+       The Linux block layer glue code needs to use page nodes with the
+       appropriate interface since their redefinition as struct list.
+
+       * linux/dev/glue/block.c: Include <kern/list.h>.
+       (struct temp_data): Define member `pages' as a struct list.
+       (alloc_buffer): Update to use list_xxx functions.
+       (free_buffer, INIT_DATA, device_open, device_read): Likewise.
+
+2016-09-21  Richard Braun  <address@hidden>
+
+       Rework pageout to handle multiple segments
+       As we're about to use a new HIGHMEM segment, potentially much larger
+       than the existing DMA and DIRECTMAP ones, it's now compulsory to make
+       the pageout daemon aware of those segments.
+
+       And while we're at it, let's fix some of the defects that have been
+       plaguing pageout forever, such as throttling, and pageout of internal
+       versus external pages (this commit notably introduces a hardcoded
+       policy in which as many external pages are selected before considering
+       internal pages).
+
+       * kern/slab.c (kmem_pagefree_physmem): Update call to vm_page_release.
+       * vm/vm_page.c: Include <kern/counters.h> and <vm/vm_pageout.h>.
+       (VM_PAGE_SEG_THRESHOLD_MIN_NUM, VM_PAGE_SEG_THRESHOLD_MIN_DENOM,
+       VM_PAGE_SEG_THRESHOLD_MIN, VM_PAGE_SEG_THRESHOLD_LOW_NUM,
+       VM_PAGE_SEG_THRESHOLD_LOW_DENOM, VM_PAGE_SEG_THRESHOLD_LOW,
+       VM_PAGE_SEG_THRESHOLD_HIGH_NUM, VM_PAGE_SEG_THRESHOLD_HIGH_DENOM,
+       VM_PAGE_SEG_THRESHOLD_HIGH, VM_PAGE_SEG_MIN_PAGES,
+       VM_PAGE_HIGH_ACTIVE_PAGE_NUM, VM_PAGE_HIGH_ACTIVE_PAGE_DENOM): New 
macros.
+       (struct vm_page_queue): New type.
+       (struct vm_page_seg): Add new members `min_free_pages', 
`low_free_pages',
+       `high_free_pages', `active_pages', `nr_active_pages', 
`high_active_pages',
+       `inactive_pages', `nr_inactive_pages'.
+       (vm_page_alloc_paused): New variable.
+       (vm_page_pageable, vm_page_can_move, vm_page_remove_mappings): New 
functions.
+       (vm_page_seg_alloc_from_buddy): Pause allocations and start the pageout
+       daemon as appropriate.
+       (vm_page_queue_init, vm_page_queue_push, vm_page_queue_remove,
+       vm_page_queue_first, vm_page_seg_get, vm_page_seg_index,
+       vm_page_seg_compute_pageout_thresholds): New functions.
+       (vm_page_seg_init): Initialize the new segment members.
+       (vm_page_seg_add_active_page, vm_page_seg_remove_active_page,
+       vm_page_seg_add_inactive_page, vm_page_seg_remove_inactive_page,
+       vm_page_seg_pull_active_page, vm_page_seg_pull_inactive_page,
+       vm_page_seg_pull_cache_page): New functions.
+       (vm_page_seg_min_page_available, vm_page_seg_page_available,
+       vm_page_seg_usable, vm_page_seg_double_lock, vm_page_seg_double_unlock,
+       vm_page_seg_balance_page, vm_page_seg_balance, vm_page_seg_evict,
+       vm_page_seg_compute_high_active_page, vm_page_seg_refill_inactive,
+       vm_page_lookup_seg, vm_page_check): New functions.
+       (vm_page_alloc_pa): Handle allocation failure from VM privileged thread.
+       (vm_page_info_all): Display additional segment properties.
+       (vm_page_wire, vm_page_unwire, vm_page_deactivate, vm_page_activate,
+       vm_page_wait): Move from vm/vm_resident.c and rewrite to use segments.
+       (vm_page_queues_remove, vm_page_check_usable, vm_page_may_balance,
+       vm_page_balance_once, vm_page_balance, vm_page_evict_once): New 
functions.
+       (VM_PAGE_MAX_LAUNDRY, VM_PAGE_MAX_EVICTIONS): New macros.
+       (vm_page_evict, vm_page_refill_inactive): New functions.
+       * vm/vm_page.h: Include <kern/list.h>.
+       (struct vm_page): Remove member `pageq', reuse the `node' member 
instead,
+       move the `listq' and `next' members above `vm_page_header'.
+       (VM_PAGE_CHECK): Define as an alias to vm_page_check.
+       (vm_page_check): New function declaration.
+       (vm_page_queue_fictitious, vm_page_queue_active, vm_page_queue_inactive,
+       vm_page_free_target, vm_page_free_min, vm_page_inactive_target,
+       vm_page_free_reserved, vm_page_free_wanted): Remove extern declarations.
+       (vm_page_external_pagedout): New extern declaration.
+       (vm_page_release): Update declaration.
+       (VM_PAGE_QUEUES_REMOVE): Define as an alias to vm_page_queues_remove.
+       (VM_PT_PMAP, VM_PT_KMEM, VM_PT_STACK): Remove macros.
+       (VM_PT_KERNEL): Update value.
+       (vm_page_queues_remove, vm_page_balance, vm_page_evict,
+       vm_page_refill_inactive): New function declarations.
+       * vm/vm_pageout.c (VM_PAGEOUT_BURST_MAX, VM_PAGEOUT_BURST_MIN,
+       VM_PAGEOUT_BURST_WAIT, VM_PAGEOUT_EMPTY_WAIT, VM_PAGEOUT_PAUSE_MAX,
+       VM_PAGE_INACTIVE_TARGET, VM_PAGE_FREE_TARGET, VM_PAGE_FREE_MIN,
+       VM_PAGE_FREE_RESERVED, VM_PAGEOUT_RESERVED_INTERNAL,
+       VM_PAGEOUT_RESERVED_REALLY): Remove macros.
+       (vm_pageout_reserved_internal, vm_pageout_reserved_really,
+       vm_pageout_burst_max, vm_pageout_burst_min, vm_pageout_burst_wait,
+       vm_pageout_empty_wait, vm_pageout_pause_count, vm_pageout_pause_max,
+       vm_pageout_active, vm_pageout_inactive, vm_pageout_inactive_nolock,
+       vm_pageout_inactive_busy, vm_pageout_inactive_absent,
+       vm_pageout_inactive_used, vm_pageout_inactive_clean,
+       vm_pageout_inactive_dirty, vm_pageout_inactive_double,
+       vm_pageout_inactive_cleaned_external): Remove variables.
+       (vm_pageout_requested, vm_pageout_continue): New variables.
+       (vm_pageout_setup): Wait for page allocation to succeed instead of
+       falling back to flush, update double paging protocol with caller,
+       add pageout throttling setup.
+       (vm_pageout_scan): Rewrite to use the new vm_page balancing,
+       eviction and inactive queue refill functions.
+       (vm_pageout_scan_continue, vm_pageout_continue): Remove functions.
+       (vm_pageout): Rewrite.
+       (vm_pageout_start, vm_pageout_resume): New functions.
+       * vm/vm_pageout.h (vm_pageout_continue, vm_pageout_scan_continue): 
Remove
+       function declarations.
+       (vm_pageout_start, vm_pageout_resume): New function declarations.
+       * vm/vm_resident.c: Include <kern/list.h>.
+       (vm_page_queue_fictitious): Define as a struct list.
+       (vm_page_free_wanted, vm_page_external_count, vm_page_free_avail,
+       vm_page_queue_active, vm_page_queue_inactive, vm_page_free_target,
+       vm_page_free_min, vm_page_inactive_target, vm_page_free_reserved):
+       Remove variables.
+       (vm_page_external_pagedout): New variable.
+       (vm_page_bootstrap): Don't initialize removed variable, update
+       initialization of vm_page_queue_fictitious.
+       (vm_page_replace): Call VM_PAGE_QUEUES_REMOVE where appropriate.
+       (vm_page_remove): Likewise.
+       (vm_page_grab_fictitious): Update to use list_xxx functions.
+       (vm_page_release_fictitious): Likewise.
+       (vm_page_grab): Remove pageout related code.
+       (vm_page_release): Add `laundry' and `external' parameters for
+       pageout throttling.
+       (vm_page_grab_contig): Remove pageout related code.
+       (vm_page_free_contig): Likewise.
+       (vm_page_free): Remove pageout related code, update call to
+       vm_page_release.
+       (vm_page_wait, vm_page_wire, vm_page_unwire, vm_page_deactivate,
+       vm_page_activate): Move to vm/vm_page.c.
+
+2016-09-21  Richard Braun  <address@hidden>
+
+       Redefine what an external page is
+       Instead of a "page considered external", which apparently takes into
+       account whether a page is dirty or not, redefine this property to
+       reliably mean "is in an external object".
+
+       This commit mostly deals with the impact of this change on the page
+       allocation interface.
+
+       * i386/intel/pmap.c (pmap_page_table_page_alloc): Update call to
+       vm_page_grab.
+       * kern/slab.c (kmem_pagealloc_physmem): Use vm_page_grab instead of
+       vm_page_grab_contig.
+       (kmem_pagefree_physmem): Use vm_page_release instead of
+       vm_page_free_contig.
+       * linux/dev/glue/block.c (alloc_buffer, device_read): Update call
+       to vm_page_grab.
+       * vm/vm_fault.c (vm_fault_page): Update calls to vm_page_grab and
+       vm_page_convert.
+       * vm/vm_map.c (vm_map_copy_steal_pages): Update call to vm_page_grab.
+       * vm/vm_page.h (struct vm_page): Remove `extcounted' member.
+       (vm_page_external_limit, vm_page_external_count): Remove extern
+       declarations.
+       (vm_page_convert, vm_page_grab): Update declarations.
+       (vm_page_release, vm_page_grab_phys_addr): New function declarations.
+       * vm/vm_pageout.c (VM_PAGE_EXTERNAL_LIMIT): Remove macro.
+       (VM_PAGE_EXTERNAL_TARGET): Likewise.
+       (vm_page_external_target): Remove variable.
+       (vm_pageout_scan): Remove specific handling of external pages.
+       (vm_pageout): Don't set vm_page_external_limit and
+       vm_page_external_target.
+       * vm/vm_resident.c (vm_page_external_limit): Remove variable.
+       (vm_page_insert, vm_page_replace, vm_page_remove): Update external
+       page tracking.
+       (vm_page_convert): Remove `external' parameter.
+       (vm_page_grab): Likewise. Remove specific handling of external pages.
+       (vm_page_grab_phys_addr): Update call to vm_page_grab.
+       (vm_page_release): Remove `external' parameter and remove specific
+       handling of external pages.
+       (vm_page_wait): Remove specific handling of external pages.
+       (vm_page_alloc): Update call to vm_page_grab.
+       (vm_page_free): Update call to vm_page_release.
+       * xen/block.c (device_read): Update call to vm_page_grab.
+       * xen/net.c (device_write): Likewise.
+
+2016-09-21  Richard Braun  <address@hidden>
+
+       Replace vm_offset_t with phys_addr_t where appropriate
+       * i386/i386/phys.c (pmap_zero_page, pmap_copy_page, copy_to_phys,
+       copy_from_phys, kvtophys): Use the phys_addr_t type for physical
+       addresses.
+       * i386/intel/pmap.c (pmap_map, pmap_map_bd, pmap_destroy,
+       pmap_remove_range, pmap_page_protect, pmap_enter, pmap_extract,
+       pmap_collect, phys_attribute_clear, phys_attribute_test,
+       pmap_clear_modify, pmap_is_modified, pmap_clear_reference,
+       pmap_is_referenced): Likewise.
+       * i386/intel/pmap.h (pt_entry_t): Unconditionally define as a
+       phys_addr_t.
+       (pmap_zero_page, pmap_copy_page, kvtophys): Use the phys_addr_t
+       type for physical addresses.
+       * vm/pmap.h (pmap_enter, pmap_page_protect, pmap_clear_reference,
+       pmap_is_referenced, pmap_clear_modify, pmap_is_modified,
+       pmap_extract, pmap_map_bd): Likewise.
+       * vm/vm_page.h (vm_page_fictitious_addr): Declare as a phys_addr_t.
+       * vm/vm_resident.c (vm_page_fictitious_addr): Likewise.
+       (vm_page_grab_phys_addr): Change return type to phys_addr_t.
+
+2016-09-21  Richard Braun  <address@hidden>
+
+       Remove phys_first_addr and phys_last_addr global variables
+       The old assumption that all physical memory is directly mapped in
+       kernel space is about to go away. Those variables are directly linked
+       to that assumption.
+
+       * i386/i386/model_dep.h (phys_first_addr): Remove extern declaration.
+       (phys_last_addr): Likewise.
+       * i386/i386/phys.c (pmap_zero_page): Use VM_PAGE_DIRECTMAP_LIMIT
+       instead of phys_last_addr.
+       (pmap_copy_page, copy_to_phys, copy_from_phys): Likewise.
+       * i386/i386/trap.c (user_trap): Remove check against phys_last_addr.
+       * i386/i386at/biosmem.c (biosmem_bootstrap_common): Don't set
+       phys_last_addr.
+       * i386/i386at/mem.c (memmmap): Use vm_page_lookup_pa to determine if
+       a physical address references physical memory.
+       * i386/i386at/model_dep.c (phys_first_addr): Remove variable.
+       (phys_last_addr): Likewise.
+       (pmap_free_pages, pmap_valid_page): Remove functions.
+       * i386/intel/pmap.c: Include i386at/biosmem.h.
+       (pa_index): Turn into an alias for vm_page_table_index.
+       (pmap_bootstrap): Replace uses of phys_first_addr and phys_last_addr
+       as appropriate.
+       (pmap_virtual_space): Use vm_page_table_size instead of phys_first_addr
+       and phys_last_addr to obtain the number of physical pages.
+       (pmap_verify_free): Remove function.
+       (valid_page): Turn this macro into an inline function and rewrite
+       using vm_page_lookup_pa.
+       (pmap_page_table_page_alloc): Build the pmap VM object using
+       vm_page_table_size to determine its size.
+       (pmap_remove_range, pmap_page_protect, phys_attribute_clear,
+       phys_attribute_test): Turn page indexes into unsigned long integers.
+       (pmap_enter): Likewise. In addition, use either vm_page_lookup_pa or
+       biosmem_directmap_end to determine if a physical address references
+       physical memory.
+       * i386/xen/xen.c (hyp_p2m_init): Use vm_page_table_size instead of
+       phys_last_addr to obtain the number of physical pages.
+       * kern/startup.c (phys_first_addr): Remove extern declaration.
+       (phys_last_addr): Likewise.
+       * linux/dev/init/main.c (linux_init): Use vm_page_seg_end with the
+       appropriate segment selector instead of phys_last_addr to determine
+       where high memory starts.
+       * vm/pmap.h: Update requirements description.
+       (pmap_free_pages, pmap_valid_page): Remove declarations.
+       * vm/vm_page.c (vm_page_seg_end, vm_page_boot_table_size,
+       vm_page_table_size, vm_page_table_index): New functions.
+       * vm/vm_page.h (vm_page_seg_end, vm_page_table_size,
+       vm_page_table_index): New function declarations.
+       * vm/vm_resident.c (vm_page_bucket_count, vm_page_hash_mask): Define
+       as unsigned long integers.
+       (vm_page_bootstrap): Compute VP table size based on the page table
+       size instead of the value returned by pmap_free_pages.
+
+2016-09-20  Richard Braun  <address@hidden>
+
+       VM: remove commented out code
+       The vm_page_direct_va, vm_page_direct_pa and vm_page_direct_ptr
+       functions were imported along with the new vm_page module, but
+       never actually used since the kernel already has phystokv and
+       kvtophys functions.
+
+2016-09-16  Richard Braun  <address@hidden>
+
+       VM: improve pageout deadlock workaround
+       Commit 5dd4f67522ad0d49a2cecdb9b109251f546d4dd1 makes VM map entry
+       allocation done with VM privilege, so that a VM map isn't held locked
+       while physical allocations are paused, which may block the default
+       pager during page eviction, causing a system-wide deadlock.
+
+       First, it turns out that map entries aren't the only buffers allocated,
+       and second, their number can't be easily determined, which makes a
+       preallocation strategy very hard to implement.
+
+       This change generalizes the strategy of VM privilege increase when a
+       VM map is locked.
+
+       * device/ds_routines.c (io_done_thread): Use integer values instead
+       of booleans when setting VM privilege.
+       * kern/thread.c (thread_init, thread_wire): Likewise.
+       * vm/vm_pageout.c (vm_pageout): Likewise.
+       * kern/thread.h (struct thread): Turn member `vm_privilege' into an
+       unsigned integer.
+       * vm/vm_map.c (vm_map_lock): New function, where VM privilege is
+       temporarily increased.
+       (vm_map_unlock): New function, where VM privilege is decreased.
+       (_vm_map_entry_create): Remove VM privilege workaround from this
+       function.
+       * vm/vm_map.h (vm_map_lock, vm_map_unlock): Turn into functions.
+
+2016-09-11  Samuel Thibault  <address@hidden>
+
+       Fix spurious warning
+       * i386/i386/db_trace.c (db_i386_stack_trace): Do not check for frame
+       validity if it is 0.
+
+       Fix size of functions interrupt and syscall
+       * i386/i386/locore.S (syscall): Add END(syscall).
+       * i386/i386at/interrupt.S (interrupt): Add END(interrupt).
+
+       Set function type on symbols created by ENTRY macro
+       * i386/include/mach/i386/asm.h (ENTRY, ENTRY2, ASENTRY, Entry): Use 
.type
+       @function on created entries.
+
+       Close parenthesis
+       * i386/i386/db_trace.c (db_i386_stack_trace): When stopping on zero 
frame,
+       close parameters parenthesis.
+
+       Fix exploring stack trace up to assembly
+       * i386/i386/db_trace.c (db_i386_stack_trace): Do not stop as soon as 
frame
+       is 0, lookup PC first, and stop only before accessing the frame content.
+
+2016-09-11  Justus Winter  <address@hidden>
+
+       ipc: Fix crash in debug code.
+       * ipc/mach_debug.c (mach_port_kernel_object): Check that the receiver
+       is valid.
+
+2016-09-07  Richard Braun  <address@hidden>
+
+       Remove map entry pageability property.
+       Since the replacement of the zone allocator, kernel objects have been
+       wired in memory. Besides, as of 5e9f6f (Stack the slab allocator
+       directly on top of the physical allocator), there is a single cache
+       used to allocate map entries.
+
+       Those changes make the pageability attribute of VM maps irrelevant.
+
+       * device/ds_routines.c (mach_device_init): Update call to kmem_submap.
+       * ipc/ipc_init.c (ipc_init): Likewise.
+       * kern/task.c (task_create): Update call to vm_map_create.
+       * vm/vm_kern.c (kmem_submap): Remove `pageable' argument. Update call
+       to vm_map_setup.
+       (kmem_init): Update call to vm_map_setup.
+       * vm/vm_kern.h (kmem_submap): Update declaration.
+       * vm/vm_map.c (vm_map_setup): Remove `pageable' argument. Don't set
+       `entries_pageable' member.
+       (vm_map_create): Likewise.
+       (vm_map_copyout): Don't bother creating copies of page entries with
+       the right pageability.
+       (vm_map_copyin): Don't set `entries_pageable' member.
+       (vm_map_fork): Update call to vm_map_create.
+       * vm/vm_map.h (struct vm_map_header): Remove `entries_pageable' member.
+       (vm_map_setup, vm_map_create): Remove `pageable' argument.
+
+2016-09-06  Richard Braun  <address@hidden>
+
+       Fix registration of strings from in boot data
+       * i386/i386at/model_dep.c (register_boot_data): Use phystokv on strings
+       when computing their length.
+
+2016-09-06  Richard Braun  <address@hidden>
+
+       Make early physical page allocation truely reliable
+       Import upstream biosmem changes and adjust for local modifications.
+
+       Specifically, this change makes the biosmem module reliably track all
+       boot data by storing their addresses in a sorted array. This allows
+       both the early page allocator and the biosmem_free_usable function
+       to accurately find any range of free pages.
+
+       * i386/i386at/biosmem.c: Remove inclusion of <i386at/elf.h>.
+       (_start, _end): Remove variable declarations.
+       (BIOSMEM_MAX_BOOT_DATA): New macro.
+       (struct biosmem_boot_data): New type.
+       (biosmem_boot_data_array, biosmem_nr_boot_data): New variables.
+       (biosmem_heap_start, biosmem_heap_bottom, biosmem_heap_top,
+       biosmem_heap_end): Change type to phys_addr_t.
+       (biosmem_panic_inval_boot_data): New variable.
+       (biosmem_panic_too_many_boot_data): Likewise.
+       (biosmem_panic_toobig_msg): Variable renamed ...
+       (biosmem_panic_too_big_msg): ... to this.
+       (biosmem_register_boot_data): New function.
+       (biosmem_unregister_boot_data): Likewise.
+       (biosmem_map_adjust): Update reference to panic message.
+       (biosmem_map_find_avail): Add detailed description.
+       (biosmem_save_cmdline_sizes): Remove function.
+       (biosmem_find_heap_clip): Likewise.
+       (biosmem_find_heap): Likewise.
+       (biosmem_find_avail_clip, biosmem_find_avail): New functions.
+       (biosmem_setup_allocator): Receive const multiboot info, replace
+       calls to biosmem_find_heap with calls to biosmem_find_avail and
+       update accordingly. Register the heap as boot data.
+       (biosmem_xen_bootstrap): Register the Xen boot info and the heap as
+       boot data.
+       (biosmem_bootstrap): Receive const multiboot information. Remove call
+       to biosmem_save_cmdline_sizes.
+       (biosmem_bootalloc): Remove assertion on the VM system state.
+       (biosmem_type_desc, biosmem_map_show): Build only if DEBUG is true.
+       (biosmem_unregister_temporary_boot_data): New function.
+       (biosmem_free_usable_range): Change address range format.
+       (biosmem_free_usable_entry): Rewrite to use biosmem_find_avail
+       without abusing it.
+       (biosmem_free_usable): Call biosmem_unregister_temporary_boot_data,
+       update call to biosmem_free_usable_entry.
+       * i386/i386at/biosmem.h (biosmem_register_boot_data): New function.
+       (biosmem_bootalloc): Update description.
+       (biosmem_bootstrap): Update description and declaration.
+       (biosmem_free_usable): Likewise.
+       * i386/i386at/model_dep.c: Include <i386at/elf.h>.
+       (machine_init): Update call to biosmem_free_usable.
+       (register_boot_data): New function.
+       (i386at_init): Call register_boot_data where appropriate.
+
+2016-09-03  Richard Braun  <address@hidden>
+
+       Fix early physical page allocation
+       Import upstream biosmem and vm_page changes, and adjust for local
+       modifications.
+
+       Specifically, the biosmem module was mistakenly loading physical
+       segments that did not clip with the heap as completely available.
+       This change makes it load them as completely unavailable during
+       startup, and once the VM system is ready, additional pages are
+       loaded.
+
+       * i386/i386at/biosmem.c (DEBUG): New macro.
+       (struct biosmem_segment): Remove members `avail_start' and `avail_end'.
+       (biosmem_heap_cur): Remove variable.
+       (biosmem_heap_bottom, biosmem_heap_top, biosmem_heap_topdown): New 
variables.
+       (biosmem_find_boot_data_update, biosmem_find_boot_data): Remove 
functions.
+       (biosmem_find_heap_clip, biosmem_find_heap): New functions.
+       (biosmem_setup_allocator): Rewritten to use the new biosmem_find_heap
+       function.
+       (biosmem_bootalloc): Support both bottom-up and top-down allocations.
+       (biosmem_directmap_size): Renamed to ...
+       (biosmem_directmap_end): ... this function.
+       (biosmem_load_segment): Fix segment loading.
+       (biosmem_setup): Restrict usable memory to the directmap segment.
+       (biosmem_free_usable_range): Add checks on input parameters.
+       (biosmem_free_usable_update_start, biosmem_free_usable_start,
+       biosmem_free_usable_reserved, biosmem_free_usable_end): Remove 
functions.
+       (biosmem_free_usable_entry): Rewritten to use the new biosmem_find_heap
+       function.
+       (biosmem_free_usable): Restrict usable memory to the directmap segment.
+       * i386/i386at/biosmem.h (biosmem_bootalloc): Update description.
+       (biosmem_directmap_size): Renamed to ...
+       (biosmem_directmap_end): ... this function.
+       (biosmem_free_usable): Update declaration.
+       * i386/i386at/model_dep.c (machine_init): Call biosmem_free_usable.
+       * vm/vm_page.c (DEBUG): New macro.
+       (struct vm_page_seg): New member `heap_present'.
+       (vm_page_load): Remove heap related parameters.
+       (vm_page_load_heap): New function.
+       * vm/vm_page.h (vm_page_load): Remove heap related parameters. Update
+       description.
+       (vm_page_load_heap): New function.
+
+2016-09-01  Richard Braun  <address@hidden>
+
+       pmap: fix map window creation on xen
+       * i386/intel/pmap.c (pmap_get_mapwindow, pmap_put_mapwindow): Use
+       the appropriate xen hypercall if building for paravirtualized page
+       table management.
+
+2016-08-31  Samuel Thibault  <address@hidden>
+
+       Avoid using non-ascii source encoding
+       * xen/console.c (hypcnintr): Replace latin1 £ character with the 0xA3
+       number.
+
+2016-08-29  Richard Braun  <address@hidden>
+
+       vm: fix boot on xen
+       * vm/vm_map.c (_vm_map_entry_create: Make sure there is a thread
+       before accessing VM privilege.
+
+2016-08-26  Samuel Thibault  <address@hidden>
+
+       Revert "Fix documentation for vm_map"
+       This reverts commit 57694037a02dda29bd678dc3b8531bd437682ba7.
+
+       We rather prefer the kernel just use whatever slot it sees fit. 
Userland has
+       already been fixed into not using the behavior anyway.
+
+2016-08-25  Samuel Thibault  <address@hidden>
+
+       Add missing memory barriers in simple lock debugging
+       * kern/lock.c (_simple_lock, _simple_lock_try, simple_unlock): Add 
compiler
+       memory barrier to separate simple_locks_taken update from information
+       filling.
+
+       Use invlpg for single-page pagetable changes
+       * i386/intel/pmap.c (INVALIDATE_TLB): When e-s is constant, equal to
+       PAGE_SIZE, use just one invlpg instruction to flush the TLB.
+
+       Drop unused macro
+       * i386/intel/pmap.c (MAX_TBIS_SIZE): Drop unused macro.
+
+2016-08-16  Richard Braun  <address@hidden>
+
+       Replace libc string functions with internal implementations
+       * Makefile.am (clib_routines): Remove memcmp, memcpy, memmove,
+       strchr, strstr and strsep.
+       * kern/strings.c (memset): Comment out.
+       (strchr, strsep, strstr): New functions.
+
+       i386: import string functions from X15 and relicense to GPLv2+
+       * i386/Makefrag.am (libkernel_a_SOURCES): Add i386/i386/strings.c.
+       * i386/i386/strings.c: New file.
+
+2016-08-12  Richard Braun  <address@hidden>
+
+       i386: request the boot loader to page-align modules
+       * i386/i386at/boothdr.S (MULTIBOOT_FLAGS): Set LSB bit.
+
 2016-08-07  Richard Braun  <address@hidden>
 
        VM: fix pageout-related deadlock
diff --git a/Makefile.am b/Makefile.am
index bbcfc11..50ff6b6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -161,9 +161,7 @@ noinst_PROGRAMS += \
        gnumach.o
 
 # This is the list of routines we decide is OK to steal from the C library.
-clib_routines := memcmp memcpy memmove                         \
-                strchr strstr strsep strtok                    \
-                htonl htons ntohl ntohs                        \
+clib_routines := htonl htons ntohl ntohs                       \
                 udivdi3 __udivdi3 __umoddi3                    \
                 __rel_iplt_start __rel_iplt_end                \
                 __ffsdi2                                       \
diff --git a/Makefile.in b/Makefile.in
index f38eef3..cf0fa1b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1076,6 +1076,7 @@ noinst_PROGRAMS = gnumach.o$(EXEEXT)
 @HOST_ix86_TRUE@       i386/i386/setjmp.h \
 @HOST_ix86_TRUE@       i386/i386/spl.S \
 @HOST_ix86_TRUE@       i386/i386/spl.h \
address@hidden@ i386/i386/strings.c \
 @HOST_ix86_TRUE@       i386/i386/task.h \
 @HOST_ix86_TRUE@       i386/i386/thread.h \
 @HOST_ix86_TRUE@       i386/i386/time_stamp.h \
@@ -1353,16 +1354,17 @@ am__libkernel_a_SOURCES_DIST = ddb/db_access.c 
ddb/db_access.h \
        i386/i386/pio.h i386/i386/pmap.h i386/i386/proc_reg.h \
        i386/i386/sched_param.h i386/i386/seg.c i386/i386/seg.h \
        i386/i386/setjmp.h i386/i386/spl.S i386/i386/spl.h \
-       i386/i386/task.h i386/i386/thread.h i386/i386/time_stamp.h \
-       i386/i386/trap.c i386/i386/trap.h i386/i386/tss.h \
-       i386/i386/user_ldt.c i386/i386/user_ldt.h i386/i386/vm_param.h \
-       i386/i386/xpr.h i386/intel/pmap.c i386/intel/pmap.h \
-       i386/intel/read_fault.c i386/intel/read_fault.h \
-       i386/i386/hardclock.c i386/i386/hardclock.h i386/i386/io_map.c \
-       i386/i386/pic.c i386/i386/pic.h i386/i386/pit.c \
-       i386/i386/pit.h i386/i386/_setjmp.S chips/busses.c \
-       chips/busses.h device/cirbuf.c i386/xen/xen.c \
-       i386/xen/xen_locore.S i386/xen/xen_boothdr.S i386/i386/xen.h
+       i386/i386/strings.c i386/i386/task.h i386/i386/thread.h \
+       i386/i386/time_stamp.h i386/i386/trap.c i386/i386/trap.h \
+       i386/i386/tss.h i386/i386/user_ldt.c i386/i386/user_ldt.h \
+       i386/i386/vm_param.h i386/i386/xpr.h i386/intel/pmap.c \
+       i386/intel/pmap.h i386/intel/read_fault.c \
+       i386/intel/read_fault.h i386/i386/hardclock.c \
+       i386/i386/hardclock.h i386/i386/io_map.c i386/i386/pic.c \
+       i386/i386/pic.h i386/i386/pit.c i386/i386/pit.h \
+       i386/i386/_setjmp.S chips/busses.c chips/busses.h \
+       device/cirbuf.c i386/xen/xen.c i386/xen/xen_locore.S \
+       i386/xen/xen_boothdr.S i386/i386/xen.h
 @address@hidden = ddb/db_access.$(OBJEXT) \
 @enable_kdb_TRUE@      ddb/db_aout.$(OBJEXT) ddb/db_elf.$(OBJEXT) \
 @enable_kdb_TRUE@      ddb/db_break.$(OBJEXT) \
@@ -1433,6 +1435,7 @@ am__libkernel_a_SOURCES_DIST = ddb/db_access.c 
ddb/db_access.h \
 @HOST_ix86_TRUE@       i386/i386/phys.$(OBJEXT) \
 @HOST_ix86_TRUE@       i386/i386/seg.$(OBJEXT) \
 @HOST_ix86_TRUE@       i386/i386/spl.$(OBJEXT) \
address@hidden@ i386/i386/strings.$(OBJEXT) \
 @HOST_ix86_TRUE@       i386/i386/trap.$(OBJEXT) \
 @HOST_ix86_TRUE@       i386/i386/user_ldt.$(OBJEXT) \
 @HOST_ix86_TRUE@       i386/intel/pmap.$(OBJEXT) \
@@ -2941,9 +2944,7 @@ gnumach_o_SOURCES =
 gnumach_o_LINK = $(LD) -u _start -r -o $@
 
 # This is the list of routines we decide is OK to steal from the C library.
-clib_routines := memcmp memcpy memmove                         \
-                strchr strstr strsep strtok                    \
-                htonl htons ntohl ntohs                        \
+clib_routines := htonl htons ntohl ntohs                       \
                 udivdi3 __udivdi3 __umoddi3                    \
                 __rel_iplt_start __rel_iplt_end                \
                 __ffsdi2                                       \
@@ -3422,6 +3423,8 @@ i386/i386/seg.$(OBJEXT): i386/i386/$(am__dirstamp) \
        i386/i386/$(DEPDIR)/$(am__dirstamp)
 i386/i386/spl.$(OBJEXT): i386/i386/$(am__dirstamp) \
        i386/i386/$(DEPDIR)/$(am__dirstamp)
+i386/i386/strings.$(OBJEXT): i386/i386/$(am__dirstamp) \
+       i386/i386/$(DEPDIR)/$(am__dirstamp)
 i386/i386/trap.$(OBJEXT): i386/i386/$(am__dirstamp) \
        i386/i386/$(DEPDIR)/$(am__dirstamp)
 i386/i386/user_ldt.$(OBJEXT): i386/i386/$(am__dirstamp) \
@@ -4242,6 +4245,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
address@hidden@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/i386at/$(DEPDIR)/address@hidden@
diff --git a/configure b/configure
index b529a51..282252c 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for GNU Mach 1.7+git20160809.
+# Generated by GNU Autoconf 2.69 for GNU Mach 1.7+git20160921.
 #
 # Report bugs to <address@hidden>.
 #
@@ -579,8 +579,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='GNU Mach'
 PACKAGE_TARNAME='gnumach'
-PACKAGE_VERSION='1.7+git20160809'
-PACKAGE_STRING='GNU Mach 1.7+git20160809'
+PACKAGE_VERSION='1.7+git20160921'
+PACKAGE_STRING='GNU Mach 1.7+git20160921'
 PACKAGE_BUGREPORT='address@hidden'
 PACKAGE_URL=''
 
@@ -1599,7 +1599,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures GNU Mach 1.7+git20160809 to adapt to many kinds of 
systems.
+\`configure' configures GNU Mach 1.7+git20160921 to adapt to many kinds of 
systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1670,7 +1670,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of GNU Mach 1.7+git20160809:";;
+     short | recursive ) echo "Configuration of GNU Mach 1.7+git20160921:";;
    esac
   cat <<\_ACEOF
 
@@ -2026,7 +2026,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-GNU Mach configure 1.7+git20160809
+GNU Mach configure 1.7+git20160921
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2118,7 +2118,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by GNU Mach $as_me 1.7+git20160809, which was
+It was created by GNU Mach $as_me 1.7+git20160921, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2984,7 +2984,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='gnumach'
- VERSION='1.7+git20160809'
+ VERSION='1.7+git20160921'
 
 
 # Some tools Automake needs.
@@ -12189,7 +12189,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by GNU Mach $as_me 1.7+git20160809, which was
+This file was extended by GNU Mach $as_me 1.7+git20160921, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -12260,7 +12260,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; 
s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-GNU Mach config.status 1.7+git20160809
+GNU Mach config.status 1.7+git20160921
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/device/ds_routines.c b/device/ds_routines.c
index 6b6dcb0..1fabec3 100644
--- a/device/ds_routines.c
+++ b/device/ds_routines.c
@@ -1512,7 +1512,7 @@ void io_done_thread(void)
        /*
         * Set thread privileges and highest priority.
         */
-       current_thread()->vm_privilege = TRUE;
+       current_thread()->vm_privilege = 1;
        stack_privilege(current_thread());
        thread_set_own_priority(0);
 
@@ -1532,7 +1532,7 @@ void mach_device_init(void)
        simple_lock_init(&io_done_list_lock);
 
        kmem_submap(device_io_map, kernel_map, &device_io_min, &device_io_max,
-                   DEVICE_IO_MAP_SIZE, FALSE);
+                   DEVICE_IO_MAP_SIZE);
 
        /*
         *      If the kernel receives many device_write requests, the
diff --git a/doc/mach.info b/doc/mach.info
index a728886..dc3faa6 100644
--- a/doc/mach.info
+++ b/doc/mach.info
@@ -1,9 +1,9 @@
-This is mach.info, produced by makeinfo version 6.1 from mach.texi.
+This is mach.info, produced by makeinfo version 6.3 from mach.texi.
 
 This file documents the GNU Mach microkernel.
 
-   This is edition 0.4, last updated on 20 April 2016, of 'The GNU Mach
-Reference Manual', for version 1.7+git20160809.
+   This is edition 0.4, last updated on 31 August 2016, of 'The GNU Mach
+Reference Manual', for version 1.7+git20160921.
 
    Copyright (C) 2001, 2002, 2006, 2007, 2008 Free Software Foundation,
 Inc.
@@ -39,126 +39,126 @@ END-INFO-DIR-ENTRY
 
 
 Indirect:
-mach.info-1: 1639
-mach.info-2: 302600
+mach.info-1: 1640
+mach.info-2: 302586
 
 Tag Table:
 (Indirect)
-Node: Top1639
-Node: Introduction11276
-Node: Audience12107
-Node: Features13142
-Node: Overview14969
-Node: History16162
-Node: Installing16307
-Node: Binary Distributions17532
-Node: Compilation18340
-Node: Configuration19573
-Node: Cross-Compilation35984
-Node: Bootstrap36765
-Ref: Bootstrap-Footnote-137208
-Node: Bootloader37445
-Ref: Bootloader-Footnote-138725
-Node: Modules38811
-Node: Inter Process Communication39638
-Node: Major Concepts40261
-Node: Messaging Interface44066
-Node: Mach Message Call44796
-Node: Message Format48111
-Node: Exchanging Port Rights59303
-Ref: Exchanging Port Rights-Footnote-164865
-Node: Memory65037
-Ref: Memory-Footnote-168131
-Node: Message Send68473
-Ref: Message Send-Footnote-175495
-Node: Message Receive75778
-Ref: Message Receive-Footnote-185430
-Node: Atomicity85711
-Node: Port Manipulation Interface88485
-Node: Port Creation90040
-Node: Port Destruction94829
-Node: Port Names97972
-Node: Port Rights102219
-Node: Ports and other Tasks106008
-Node: Receive Rights110101
-Node: Port Sets117032
-Node: Request Notifications119435
-Node: Inherited Ports124239
-Node: Virtual Memory Interface127923
-Node: Memory Allocation129176
-Node: Memory Deallocation131701
-Node: Data Transfer133165
-Node: Memory Attributes136691
-Node: Mapping Memory Objects146130
-Node: Memory Statistics149439
-Node: External Memory Management151013
-Node: Memory Object Server151718
-Node: Memory Object Creation154427
-Node: Memory Object Termination158475
-Node: Memory Objects and Data161414
-Node: Memory Object Locking178560
-Node: Memory Object Attributes184455
-Node: Default Memory Manager190292
-Node: Threads and Tasks196014
-Node: Thread Interface196351
-Node: Thread Creation197347
-Node: Thread Termination198464
-Node: Thread Information198935
-Node: Thread Settings205034
-Node: Thread Execution206268
-Node: Scheduling213561
-Node: Thread Priority213916
-Node: Hand-Off Scheduling216550
-Node: Scheduling Policy221675
-Node: Thread Special Ports223007
-Node: Exceptions225453
-Node: Task Interface226323
-Node: Task Creation227335
-Node: Task Termination228670
-Node: Task Information229272
-Node: Task Execution236174
-Node: Task Special Ports240587
-Node: Syscall Emulation244441
-Node: Profiling245672
-Node: Host Interface249435
-Node: Host Ports250420
-Node: Host Information252493
-Node: Host Time257876
-Node: Host Reboot260543
-Node: Processors and Processor Sets261095
-Node: Processor Set Interface262073
-Node: Processor Set Ports262840
-Node: Processor Set Access263670
-Node: Processor Set Creation265930
-Node: Processor Set Destruction266957
-Node: Tasks and Threads on Sets267878
-Node: Processor Set Priority273045
-Node: Processor Set Policy274335
-Node: Processor Set Info275949
-Node: Processor Interface279762
-Node: Hosted Processors280487
-Node: Processor Control281478
-Node: Processors and Sets282944
-Node: Processor Info284822
-Node: Device Interface287564
-Node: Device Reply Server289179
-Node: Device Open290471
-Node: Device Close292594
-Node: Device Read293173
-Node: Device Write296092
-Node: Device Map298897
-Node: Device Status299788
-Node: Device Filter302600
-Node: Kernel Debugger308347
-Node: Operation309074
-Node: Commands312051
-Node: Variables325836
-Node: Expressions327224
-Node: Copying328573
-Node: Documentation License347802
-Node: GNU Free Documentation License348391
-Node: CMU License370790
-Node: Concept Index372025
-Node: Function and Data Index375871
+Node: Top1640
+Node: Introduction11278
+Node: Audience12109
+Node: Features13144
+Node: Overview14971
+Node: History16164
+Node: Installing16309
+Node: Binary Distributions17534
+Node: Compilation18342
+Node: Configuration19575
+Node: Cross-Compilation35986
+Node: Bootstrap36767
+Ref: Bootstrap-Footnote-137210
+Node: Bootloader37447
+Ref: Bootloader-Footnote-138727
+Node: Modules38813
+Node: Inter Process Communication39640
+Node: Major Concepts40263
+Node: Messaging Interface44068
+Node: Mach Message Call44798
+Node: Message Format48113
+Node: Exchanging Port Rights59305
+Ref: Exchanging Port Rights-Footnote-164867
+Node: Memory65039
+Ref: Memory-Footnote-168133
+Node: Message Send68475
+Ref: Message Send-Footnote-175497
+Node: Message Receive75780
+Ref: Message Receive-Footnote-185432
+Node: Atomicity85713
+Node: Port Manipulation Interface88487
+Node: Port Creation90042
+Node: Port Destruction94831
+Node: Port Names97974
+Node: Port Rights102221
+Node: Ports and other Tasks106010
+Node: Receive Rights110103
+Node: Port Sets117034
+Node: Request Notifications119437
+Node: Inherited Ports124241
+Node: Virtual Memory Interface127925
+Node: Memory Allocation129178
+Node: Memory Deallocation131703
+Node: Data Transfer133167
+Node: Memory Attributes136693
+Node: Mapping Memory Objects146132
+Node: Memory Statistics149424
+Node: External Memory Management150998
+Node: Memory Object Server151703
+Node: Memory Object Creation154412
+Node: Memory Object Termination158460
+Node: Memory Objects and Data161399
+Node: Memory Object Locking178545
+Node: Memory Object Attributes184440
+Node: Default Memory Manager190277
+Node: Threads and Tasks195999
+Node: Thread Interface196336
+Node: Thread Creation197332
+Node: Thread Termination198449
+Node: Thread Information198920
+Node: Thread Settings205019
+Node: Thread Execution206253
+Node: Scheduling213546
+Node: Thread Priority213901
+Node: Hand-Off Scheduling216535
+Node: Scheduling Policy221660
+Node: Thread Special Ports222992
+Node: Exceptions225438
+Node: Task Interface226308
+Node: Task Creation227320
+Node: Task Termination228655
+Node: Task Information229257
+Node: Task Execution236159
+Node: Task Special Ports240572
+Node: Syscall Emulation244426
+Node: Profiling245657
+Node: Host Interface249420
+Node: Host Ports250405
+Node: Host Information252478
+Node: Host Time257861
+Node: Host Reboot260528
+Node: Processors and Processor Sets261080
+Node: Processor Set Interface262058
+Node: Processor Set Ports262825
+Node: Processor Set Access263655
+Node: Processor Set Creation265915
+Node: Processor Set Destruction266942
+Node: Tasks and Threads on Sets267863
+Node: Processor Set Priority273030
+Node: Processor Set Policy274320
+Node: Processor Set Info275934
+Node: Processor Interface279747
+Node: Hosted Processors280472
+Node: Processor Control281463
+Node: Processors and Sets282929
+Node: Processor Info284807
+Node: Device Interface287549
+Node: Device Reply Server289164
+Node: Device Open290456
+Node: Device Close292579
+Node: Device Read293158
+Node: Device Write296077
+Node: Device Map298882
+Node: Device Status299773
+Node: Device Filter302586
+Node: Kernel Debugger308333
+Node: Operation309060
+Node: Commands312037
+Node: Variables325822
+Node: Expressions327210
+Node: Copying328559
+Node: Documentation License347788
+Node: GNU Free Documentation License348377
+Node: CMU License370776
+Node: Concept Index372011
+Node: Function and Data Index375857
 
 End Tag Table
diff --git a/doc/mach.info-1 b/doc/mach.info-1
index aabe01d..32bc7c3 100644
--- a/doc/mach.info-1
+++ b/doc/mach.info-1
@@ -1,9 +1,9 @@
-This is mach.info, produced by makeinfo version 6.1 from mach.texi.
+This is mach.info, produced by makeinfo version 6.3 from mach.texi.
 
 This file documents the GNU Mach microkernel.
 
-   This is edition 0.4, last updated on 20 April 2016, of 'The GNU Mach
-Reference Manual', for version 1.7+git20160809.
+   This is edition 0.4, last updated on 31 August 2016, of 'The GNU Mach
+Reference Manual', for version 1.7+git20160921.
 
    Copyright (C) 2001, 2002, 2006, 2007, 2008 Free Software Foundation,
 Inc.
@@ -45,8 +45,8 @@ Main Menu
 
 This file documents the GNU Mach microkernel.
 
-   This is edition 0.4, last updated on 20 April 2016, of 'The GNU Mach
-Reference Manual', for version 1.7+git20160809.
+   This is edition 0.4, last updated on 31 August 2016, of 'The GNU Mach
+Reference Manual', for version 1.7+git20160921.
 
    Copyright (C) 2001, 2002, 2006, 2007, 2008 Free Software Foundation,
 Inc.
@@ -3347,14 +3347,14 @@ File: mach.info,  Node: Mapping Memory Objects,  Next: 
Memory Statistics,  Prev:
      memory exception.
 
      TARGET_TASK is the task to be affected.  The starting address is
-     ADDRESS.  If the ANYWHERE option is used, this address is used as a
-     starting hint.  The address actually allocated will be returned in
-     ADDRESS.  SIZE is the number of bytes to allocate (rounded by the
-     system in a machine dependent way).  The alignment restriction is
-     specified by MASK.  Bits asserted in this mask must not be asserted
-     in the address returned.  If ANYWHERE is set, the kernel should
-     find and allocate any region of the specified size, and return the
-     address of the resulting region in ADDRESS.
+     ADDRESS.  If the ANYWHERE option is used, this address is ignored.
+     The address actually allocated will be returned in ADDRESS.  SIZE
+     is the number of bytes to allocate (rounded by the system in a
+     machine dependent way).  The alignment restriction is specified by
+     MASK.  Bits asserted in this mask must not be asserted in the
+     address returned.  If ANYWHERE is set, the kernel should find and
+     allocate any region of the specified size, and return the address
+     of the resulting region in ADDRESS.
 
      MEMORY_OBJECT is the port that represents the memory object: used
      by user tasks in 'vm_map'; used by the make requests for data or
diff --git a/doc/mach.info-2 b/doc/mach.info-2
index 847cb6d..78d8dd1 100644
--- a/doc/mach.info-2
+++ b/doc/mach.info-2
@@ -1,9 +1,9 @@
-This is mach.info, produced by makeinfo version 6.1 from mach.texi.
+This is mach.info, produced by makeinfo version 6.3 from mach.texi.
 
 This file documents the GNU Mach microkernel.
 
-   This is edition 0.4, last updated on 20 April 2016, of 'The GNU Mach
-Reference Manual', for version 1.7+git20160809.
+   This is edition 0.4, last updated on 31 August 2016, of 'The GNU Mach
+Reference Manual', for version 1.7+git20160921.
 
    Copyright (C) 2001, 2002, 2006, 2007, 2008 Free Software Foundation,
 Inc.
diff --git a/doc/mach.texi b/doc/mach.texi
index 0aeed76..98f72fa 100644
--- a/doc/mach.texi
+++ b/doc/mach.texi
@@ -3367,7 +3367,7 @@ exception.
 
 @var{target_task} is the task to be affected.  The starting address is
 @var{address}.  If the @var{anywhere} option is used, this address is
-used as a starting hint.  The address actually allocated will be returned in
+ignored.  The address actually allocated will be returned in
 @var{address}. @var{size} is the number of bytes to allocate (rounded by
 the system in a machine dependent way).  The alignment restriction is
 specified by @var{mask}.  Bits asserted in this mask must not be
diff --git a/doc/stamp-vti b/doc/stamp-vti
index 5bfda11..165f9b3 100644
--- a/doc/stamp-vti
+++ b/doc/stamp-vti
@@ -1,4 +1,4 @@
address@hidden UPDATED 20 April 2016
address@hidden UPDATED-MONTH April 2016
address@hidden EDITION 1.7+git20160809
address@hidden VERSION 1.7+git20160809
address@hidden UPDATED 31 August 2016
address@hidden UPDATED-MONTH August 2016
address@hidden EDITION 1.7+git20160921
address@hidden VERSION 1.7+git20160921
diff --git a/doc/version.texi b/doc/version.texi
index 5bfda11..165f9b3 100644
--- a/doc/version.texi
+++ b/doc/version.texi
@@ -1,4 +1,4 @@
address@hidden UPDATED 20 April 2016
address@hidden UPDATED-MONTH April 2016
address@hidden EDITION 1.7+git20160809
address@hidden VERSION 1.7+git20160809
address@hidden UPDATED 31 August 2016
address@hidden UPDATED-MONTH August 2016
address@hidden EDITION 1.7+git20160921
address@hidden VERSION 1.7+git20160921
diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index c61a3f6..90f20fb 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -151,6 +151,7 @@ libkernel_a_SOURCES += \
        i386/i386/setjmp.h \
        i386/i386/spl.S \
        i386/i386/spl.h \
+       i386/i386/strings.c \
        i386/i386/task.h \
        i386/i386/thread.h \
        i386/i386/time_stamp.h \
diff --git a/i386/i386/db_trace.c b/i386/i386/db_trace.c
index c8789e7..898feba 100644
--- a/i386/i386/db_trace.c
+++ b/i386/i386/db_trace.c
@@ -431,7 +431,7 @@ db_i386_stack_trace(
        }
 
        lastframe = 0;
-       while (count-- && frame != 0) {
+       while (count--) {
            int         narg;
            char *      name;
            db_expr_t   offset;
@@ -459,9 +459,12 @@ db_i386_stack_trace(
                    goto next_frame;
                } else {
                    frame_type = 0;
-                   narg = db_numargs(frame, task);
+                   if (frame)
+                       narg = db_numargs(frame, task);
+                   else
+                       narg = -1;
                }
-           } else if (INKERNEL(callpc) ^ INKERNEL(frame)) {
+           } else if (!frame || INKERNEL(callpc) ^ INKERNEL(frame)) {
                frame_type = 0;
                narg = -1;
            } else {
@@ -477,6 +480,10 @@ db_i386_stack_trace(
            } else
                db_printf("%s(", name);
 
+           if (!frame) {
+               db_printf(")\n");
+               break;
+           }
            argp = &frame->f_arg0;
            while (narg > 0) {
                db_printf("%x", 
db_get_task_value((long)argp,sizeof(long),FALSE,task));
@@ -501,10 +508,6 @@ db_i386_stack_trace(
        next_frame:
            db_nextframe(&lastframe, &frame, &callpc, frame_type, th);
 
-           if (frame == 0) {
-               /* end of chain */
-               break;
-           }
            if (!INKERNEL(lastframe) ||
                (!INKERNEL(callpc) && !INKERNEL(frame)))
                user_frame++;
@@ -513,7 +516,7 @@ db_i386_stack_trace(
                if (kernel_only)
                    break;
            }
-           if (frame <= lastframe) {
+           if (frame && frame <= lastframe) {
                if (INKERNEL(lastframe) && !INKERNEL(frame))
                    continue;
                db_printf("Bad frame pointer: 0x%x\n", frame);
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index c715d95..ddba224 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -1180,6 +1180,8 @@ DATA(cpu_features)
        .long   0
        .text
 
+END(syscall)
+
 /* Discover what kind of cpu we have; return the family number
    (3, 4, 5, 6, for 386, 486, 586, 686 respectively).  */
 ENTRY(discover_x86_cpu_type)
diff --git a/i386/i386/model_dep.h b/i386/i386/model_dep.h
index ab2738f..54aa1ec 100644
--- a/i386/i386/model_dep.h
+++ b/i386/i386/model_dep.h
@@ -50,11 +50,4 @@ extern void halt_cpu (void) __attribute__ ((noreturn));
  */
 extern void halt_all_cpus (boolean_t reboot) __attribute__ ((noreturn));
 
-/*
- *     More-specific code provides these;
- *     they indicate the total extent of physical memory
- *     that we know about and might ever have to manage.
- */
-extern vm_offset_t phys_first_addr, phys_last_addr;
-
 #endif /* _I386AT_MODEL_DEP_H_ */
diff --git a/i386/i386/phys.c b/i386/i386/phys.c
index 8681fba..a5c3a15 100644
--- a/i386/i386/phys.c
+++ b/i386/i386/phys.c
@@ -47,12 +47,12 @@
  *     pmap_zero_page zeros the specified (machine independent) page.
  */
 void
-pmap_zero_page(vm_offset_t p)
+pmap_zero_page(phys_addr_t p)
 {
        assert(p != vm_page_fictitious_addr);
        vm_offset_t v;
        pmap_mapwindow_t *map;
-       boolean_t mapped = p >= phys_last_addr;
+       boolean_t mapped = p >= VM_PAGE_DIRECTMAP_LIMIT;
 
        if (mapped)
        {
@@ -73,14 +73,14 @@ pmap_zero_page(vm_offset_t p)
  */
 void
 pmap_copy_page(
-       vm_offset_t src, 
-       vm_offset_t dst)
+       phys_addr_t src,
+       phys_addr_t dst)
 {
        vm_offset_t src_addr_v, dst_addr_v;
        pmap_mapwindow_t *src_map = NULL;
        pmap_mapwindow_t *dst_map;
-       boolean_t src_mapped = src >= phys_last_addr;
-       boolean_t dst_mapped = dst >= phys_last_addr;
+       boolean_t src_mapped = src >= VM_PAGE_DIRECTMAP_LIMIT;
+       boolean_t dst_mapped = dst >= VM_PAGE_DIRECTMAP_LIMIT;
        assert(src != vm_page_fictitious_addr);
        assert(dst != vm_page_fictitious_addr);
 
@@ -116,12 +116,12 @@ pmap_copy_page(
 void
 copy_to_phys(
        vm_offset_t     src_addr_v, 
-       vm_offset_t     dst_addr_p,
+       phys_addr_t     dst_addr_p,
        int             count)
 {
        vm_offset_t dst_addr_v;
        pmap_mapwindow_t *dst_map;
-       boolean_t mapped = dst_addr_p >= phys_last_addr;
+       boolean_t mapped = dst_addr_p >= VM_PAGE_DIRECTMAP_LIMIT;
        assert(dst_addr_p != vm_page_fictitious_addr);
        assert(pa_to_pte(dst_addr_p + count-1) == pa_to_pte(dst_addr_p));
 
@@ -147,13 +147,13 @@ copy_to_phys(
  */
 void
 copy_from_phys(
-       vm_offset_t     src_addr_p, 
+       phys_addr_t     src_addr_p, 
        vm_offset_t     dst_addr_v,
        int             count)
 {
        vm_offset_t src_addr_v;
        pmap_mapwindow_t *src_map;
-       boolean_t mapped = src_addr_p >= phys_last_addr;
+       boolean_t mapped = src_addr_p >= VM_PAGE_DIRECTMAP_LIMIT;
        assert(src_addr_p != vm_page_fictitious_addr);
        assert(pa_to_pte(src_addr_p + count-1) == pa_to_pte(src_addr_p));
 
@@ -176,7 +176,7 @@ copy_from_phys(
  *
  *     Convert a kernel virtual address to a physical address
  */
-vm_offset_t
+phys_addr_t
 kvtophys(vm_offset_t addr)
 {
        pt_entry_t *pte;
diff --git a/i386/i386/strings.c b/i386/i386/strings.c
new file mode 100644
index 0000000..84a3bc1
--- /dev/null
+++ b/i386/i386/strings.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#define ARCH_STRING_MEMCPY
+#define ARCH_STRING_MEMMOVE
+#define ARCH_STRING_MEMSET
+#define ARCH_STRING_MEMCMP
+
+#ifdef ARCH_STRING_MEMCPY
+void *
+memcpy(void *dest, const void *src, size_t n)
+{
+    void *orig_dest;
+
+    orig_dest = dest;
+    asm volatile("rep movsb"
+                 : "+D" (dest), "+S" (src), "+c" (n)
+                 : : "memory");
+    return orig_dest;
+}
+#endif /* ARCH_STRING_MEMCPY */
+
+#ifdef ARCH_STRING_MEMMOVE
+void *
+memmove(void *dest, const void *src, size_t n)
+{
+    void *orig_dest;
+
+    orig_dest = dest;
+
+    if (dest <= src)
+        asm volatile("rep movsb"
+                     : "+D" (dest), "+S" (src), "+c" (n)
+                     : : "memory");
+    else {
+        dest += n - 1;
+        src += n - 1;
+        asm volatile("std; rep movsb; cld"
+                     : "+D" (dest), "+S" (src), "+c" (n)
+                     : : "memory");
+    }
+
+    return orig_dest;
+}
+#endif /* ARCH_STRING_MEMMOVE */
+
+#ifdef ARCH_STRING_MEMSET
+void *
+memset(void *s, int c, size_t n)
+{
+    void *orig_s;
+
+    orig_s = s;
+    asm volatile("rep stosb"
+                 : "+D" (s), "+c" (n)
+                 : "a" (c)
+                 : "memory");
+    return orig_s;
+}
+#endif /* ARCH_STRING_MEMSET */
+
+#ifdef ARCH_STRING_MEMCMP
+int
+memcmp(const void *s1, const void *s2, size_t n)
+{
+    unsigned char c1, c2;
+
+    if (n == 0)
+        return 0;
+
+    asm volatile("repe cmpsb"
+                 : "+D" (s1), "+S" (s2), "+c" (n)
+                 : : "memory");
+    c1 = *(((const unsigned char *)s1) - 1);
+    c2 = *(((const unsigned char *)s2) - 1);
+    return (int)c1 - (int)c2;
+}
+#endif /* ARCH_STRING_MEMCMP */
+
+#ifdef ARCH_STRING_STRLEN
+size_t
+strlen(const char *s)
+{
+    size_t n;
+
+    n = (size_t)-1;
+    asm volatile("repne scasb"
+                 : "+D" (s), "+c" (n)
+                 : "a" (0)
+                 : "memory");
+    return ~n - 1;
+}
+#endif /* ARCH_STRING_STRLEN */
+
+#ifdef ARCH_STRING_STRCPY
+char *
+strcpy(char *dest, const char *src)
+{
+    char *orig_dest;
+
+    orig_dest = dest;
+    asm volatile("1:\n"
+                 "lodsb\n"
+                 "stosb\n"
+                 "testb %%al, %%al\n"
+                 "jnz 1b\n"
+                 : "+D" (dest), "+S" (src)
+                 : : "al", "memory");
+    return orig_dest;
+}
+#endif /* ARCH_STRING_STRCPY */
+
+#ifdef ARCH_STRING_STRCMP
+int
+strcmp(const char *s1, const char *s2)
+{
+    unsigned char c1, c2;
+
+    asm volatile("1:\n"
+                 "lodsb\n"
+                 "scasb\n"
+                 "jne 1f\n"
+                 "testb %%al, %%al\n"
+                 "jnz 1b\n"
+                 "1:\n"
+                 : "+D" (s1), "+S" (s2)
+                 : : "al", "memory");
+    c1 = *(((const unsigned char *)s1) - 1);
+    c2 = *(((const unsigned char *)s2) - 1);
+    return (int)c1 - (int)c2;
+}
+#endif /* ARCH_STRING_STRCMP */
diff --git a/i386/i386/trap.c b/i386/i386/trap.c
index 6470504..d4bdc7f 100644
--- a/i386/i386/trap.c
+++ b/i386/i386/trap.c
@@ -351,16 +351,6 @@ int user_trap(struct i386_saved_state *regs)
        int     type;
        thread_t thread = current_thread();
 
-       if ((vm_offset_t)thread < phys_last_addr) {
-               printf("user_trap: bad thread pointer 0x%p\n", thread);
-               printf("trap type %ld, code 0x%lx, va 0x%lx, eip 0x%lx\n",
-                      regs->trapno, regs->err, regs->cr2, regs->eip);
-               asm volatile ("1: hlt; jmp 1b");
-       }
-#if 0
-printf("user trap %d error %d sub %08x\n", type, code, subcode);
-#endif
-
        type = regs->trapno;
        code = 0;
        subcode = 0;
diff --git a/i386/i386at/biosmem.c b/i386/i386at/biosmem.c
index a7a440e..a104020 100644
--- a/i386/i386at/biosmem.c
+++ b/i386/i386at/biosmem.c
@@ -18,7 +18,6 @@
 #include <string.h>
 #include <i386/model_dep.h>
 #include <i386at/biosmem.h>
-#include <i386at/elf.h>
 #include <kern/assert.h>
 #include <kern/debug.h>
 #include <kern/macros.h>
@@ -29,6 +28,8 @@
 #include <sys/types.h>
 #include <vm/vm_page.h>
 
+#define DEBUG 0
+
 #define __boot
 #define __bootdata
 #define __init
@@ -41,7 +42,26 @@
 #define BOOT_CGACHARS   (80 * 25)
 #define BOOT_CGACOLOR   0x7
 
-extern char _start, _end;
+#define BIOSMEM_MAX_BOOT_DATA 64
+
+/*
+ * Boot data descriptor.
+ *
+ * The start and end addresses must not be page-aligned, since there
+ * could be more than one range inside a single page.
+ */
+struct biosmem_boot_data {
+    phys_addr_t start;
+    phys_addr_t end;
+    boolean_t temporary;
+};
+
+/*
+ * Sorted array of boot data descriptors.
+ */
+static struct biosmem_boot_data biosmem_boot_data_array[BIOSMEM_MAX_BOOT_DATA]
+    __bootdata;
+static unsigned int biosmem_nr_boot_data __bootdata;
 
 /*
  * Maximum number of entries in the BIOS memory map.
@@ -71,19 +91,6 @@ struct biosmem_map_entry {
 };
 
 /*
- * Contiguous block of physical memory.
- *
- * Tha "available" range records what has been passed to the VM system as
- * available inside the segment.
- */
-struct biosmem_segment {
-    phys_addr_t start;
-    phys_addr_t end;
-    phys_addr_t avail_start;
-    phys_addr_t avail_end;
-};
-
-/*
  * Memory map built from the information passed by the boot loader.
  *
  * If the boot loader didn't pass a valid memory map, a simple map is built
@@ -94,6 +101,14 @@ static struct biosmem_map_entry 
biosmem_map[BIOSMEM_MAX_MAP_SIZE * 2]
 static unsigned int biosmem_map_size __bootdata;
 
 /*
+ * Contiguous block of physical memory.
+ */
+struct biosmem_segment {
+    phys_addr_t start;
+    phys_addr_t end;
+};
+
+/*
  * Physical segment boundaries.
  */
 static struct biosmem_segment biosmem_segments[VM_PAGE_MAX_SEGS] __bootdata;
@@ -103,11 +118,24 @@ static struct biosmem_segment 
biosmem_segments[VM_PAGE_MAX_SEGS] __bootdata;
  *
  * This heap is located above BIOS memory.
  */
-static uint32_t biosmem_heap_start __bootdata;
-static uint32_t biosmem_heap_cur __bootdata;
-static uint32_t biosmem_heap_end __bootdata;
+static phys_addr_t biosmem_heap_start __bootdata;
+static phys_addr_t biosmem_heap_bottom __bootdata;
+static phys_addr_t biosmem_heap_top __bootdata;
+static phys_addr_t biosmem_heap_end __bootdata;
+
+/*
+ * Boot allocation policy.
+ *
+ * Top-down allocations are normally preferred to avoid unnecessarily
+ * filling the DMA segment.
+ */
+static boolean_t biosmem_heap_topdown __bootdata;
 
-static char biosmem_panic_toobig_msg[] __bootdata
+static char biosmem_panic_inval_boot_data[] __bootdata
+    = "biosmem: invalid boot data";
+static char biosmem_panic_too_many_boot_data[] __bootdata
+    = "biosmem: too many boot data ranges";
+static char biosmem_panic_too_big_msg[] __bootdata
     = "biosmem: too many memory map entries";
 #ifndef MACH_HYP
 static char biosmem_panic_setup_msg[] __bootdata
@@ -120,6 +148,103 @@ static char biosmem_panic_inval_msg[] __bootdata
 static char biosmem_panic_nomem_msg[] __bootdata
     = "biosmem: unable to allocate memory";
 
+void __boot
+biosmem_register_boot_data(phys_addr_t start, phys_addr_t end,
+                           boolean_t temporary)
+{
+    unsigned int i;
+
+    if (start >= end) {
+        boot_panic(biosmem_panic_inval_boot_data);
+    }
+
+    if (biosmem_nr_boot_data == ARRAY_SIZE(biosmem_boot_data_array)) {
+        boot_panic(biosmem_panic_too_many_boot_data);
+    }
+
+    for (i = 0; i < biosmem_nr_boot_data; i++) {
+        /* Check if the new range overlaps */
+        if ((end > biosmem_boot_data_array[i].start)
+             && (start < biosmem_boot_data_array[i].end)) {
+
+            /*
+             * If it does, check whether it's part of another range.
+             * For example, this applies to debugging symbols directly
+             * taken from the kernel image.
+             */
+            if ((start >= biosmem_boot_data_array[i].start)
+                && (end <= biosmem_boot_data_array[i].end)) {
+
+                /*
+                 * If it's completely included, make sure that a permanent
+                 * range remains permanent.
+                 *
+                 * XXX This means that if one big range is first registered
+                 * as temporary, and a smaller range inside of it is
+                 * registered as permanent, the bigger range becomes
+                 * permanent. It's not easy nor useful in practice to do
+                 * better than that.
+                 */
+                if (biosmem_boot_data_array[i].temporary != temporary) {
+                    biosmem_boot_data_array[i].temporary = FALSE;
+                }
+
+                return;
+            }
+
+            boot_panic(biosmem_panic_inval_boot_data);
+        }
+
+        if (end <= biosmem_boot_data_array[i].start) {
+            break;
+        }
+    }
+
+    boot_memmove(&biosmem_boot_data_array[i + 1],
+                 &biosmem_boot_data_array[i],
+                 (biosmem_nr_boot_data - i) * 
sizeof(*biosmem_boot_data_array));
+
+    biosmem_boot_data_array[i].start = start;
+    biosmem_boot_data_array[i].end = end;
+    biosmem_boot_data_array[i].temporary = temporary;
+    biosmem_nr_boot_data++;
+}
+
+static void __init
+biosmem_unregister_boot_data(phys_addr_t start, phys_addr_t end)
+{
+    unsigned int i;
+
+    if (start >= end) {
+        panic(biosmem_panic_inval_boot_data);
+    }
+
+    assert(biosmem_nr_boot_data != 0);
+
+    for (i = 0; biosmem_nr_boot_data; i++) {
+        if ((start == biosmem_boot_data_array[i].start)
+            && (end == biosmem_boot_data_array[i].end)) {
+            break;
+        }
+    }
+
+    if (i == biosmem_nr_boot_data) {
+        return;
+    }
+
+#if DEBUG
+    printf("biosmem: unregister boot data: %llx:%llx\n",
+           (unsigned long long)biosmem_boot_data_array[i].start,
+           (unsigned long long)biosmem_boot_data_array[i].end);
+#endif /* DEBUG */
+
+    biosmem_nr_boot_data--;
+
+    boot_memmove(&biosmem_boot_data_array[i],
+                 &biosmem_boot_data_array[i + 1],
+                 (biosmem_nr_boot_data - i) * 
sizeof(*biosmem_boot_data_array));
+}
+
 #ifndef MACH_HYP
 
 static void __boot
@@ -302,7 +427,7 @@ biosmem_map_adjust(void)
                  */
 
                 if (biosmem_map_size >= ARRAY_SIZE(biosmem_map))
-                    boot_panic(biosmem_panic_toobig_msg);
+                    boot_panic(biosmem_panic_too_big_msg);
 
                 biosmem_map[biosmem_map_size] = tmp;
                 biosmem_map_size++;
@@ -321,6 +446,16 @@ biosmem_map_adjust(void)
     biosmem_map_sort();
 }
 
+/*
+ * Find addresses of physical memory within a given range.
+ *
+ * This function considers the memory map with the [*phys_start, *phys_end]
+ * range on entry, and returns the lowest address of physical memory
+ * in *phys_start, and the highest address of unusable memory immediately
+ * following physical memory in *phys_end.
+ *
+ * These addresses are normally used to establish the range of a segment.
+ */
 static int __boot
 biosmem_map_find_avail(phys_addr_t *phys_start, phys_addr_t *phys_end)
 {
@@ -382,161 +517,132 @@ biosmem_segment_size(unsigned int seg_index)
     return biosmem_segments[seg_index].end - biosmem_segments[seg_index].start;
 }
 
-#ifndef MACH_HYP
-
-static void __boot
-biosmem_save_cmdline_sizes(struct multiboot_raw_info *mbi)
+static int __boot
+biosmem_find_avail_clip(phys_addr_t *avail_start, phys_addr_t *avail_end,
+                        phys_addr_t data_start, phys_addr_t data_end)
 {
-    struct multiboot_raw_module *mod;
-    uint32_t i, va;
+    phys_addr_t orig_end;
 
-    if (mbi->flags & MULTIBOOT_LOADER_CMDLINE) {
-        va = phystokv(mbi->cmdline);
-        mbi->unused0 = boot_strlen((char *)va) + 1;
-    }
+    assert(data_start < data_end);
 
-    if (mbi->flags & MULTIBOOT_LOADER_MODULES) {
-        unsigned long addr;
+    orig_end = data_end;
+    data_start = vm_page_trunc(data_start);
+    data_end = vm_page_round(data_end);
 
-        addr = phystokv(mbi->mods_addr);
+    if (data_end < orig_end) {
+        boot_panic(biosmem_panic_inval_boot_data);
+    }
 
-        for (i = 0; i < mbi->mods_count; i++) {
-            mod = (struct multiboot_raw_module *)addr + i;
-            va = phystokv(mod->string);
-            mod->reserved = boot_strlen((char *)va) + 1;
-        }
+    if ((data_end <= *avail_start) || (data_start >= *avail_end)) {
+        return 0;
     }
-}
 
-static void __boot
-biosmem_find_boot_data_update(uint32_t min, uint32_t *start, uint32_t *end,
-                              uint32_t data_start, uint32_t data_end)
-{
-    if ((min <= data_start) && (data_start < *start)) {
-        *start = data_start;
-        *end = data_end;
+    if (data_start > *avail_start) {
+        *avail_end = data_start;
+    } else {
+        if (data_end >= *avail_end) {
+            return -1;
+        }
+
+        *avail_start = data_end;
     }
+
+    return 0;
 }
 
 /*
- * Find the first boot data in the given range, and return their containing
- * area (start address is returned directly, end address is returned in end).
- * The following are considered boot data :
- *  - the kernel
- *  - the kernel command line
- *  - the module table
- *  - the modules
- *  - the modules command lines
- *  - the ELF section header table
- *  - the ELF .shstrtab, .symtab and .strtab sections
+ * Find available memory in the given range.
+ *
+ * The search starts at the given start address, up to the given end address.
+ * If a range is found, it is stored through the avail_startp and avail_endp
+ * pointers.
  *
- * If no boot data was found, 0 is returned, and the end address isn't set.
+ * The range boundaries are page-aligned on return.
  */
-static uint32_t __boot
-biosmem_find_boot_data(const struct multiboot_raw_info *mbi, uint32_t min,
-                       uint32_t max, uint32_t *endp)
+static int __boot
+biosmem_find_avail(phys_addr_t start, phys_addr_t end,
+                   phys_addr_t *avail_start, phys_addr_t *avail_end)
 {
-    struct multiboot_raw_module *mod;
-    struct elf_shdr *shdr;
-    uint32_t i, start, end = end;
-    unsigned long tmp;
-
-    start = max;
-
-    biosmem_find_boot_data_update(min, &start, &end, _kvtophys(&_start),
-                                  _kvtophys(&_end));
-
-    if ((mbi->flags & MULTIBOOT_LOADER_CMDLINE) && (mbi->cmdline != 0))
-        biosmem_find_boot_data_update(min, &start, &end, mbi->cmdline,
-                                      mbi->cmdline + mbi->unused0);
-
-    if (mbi->flags & MULTIBOOT_LOADER_MODULES) {
-        i = mbi->mods_count * sizeof(struct multiboot_raw_module);
-        biosmem_find_boot_data_update(min, &start, &end, mbi->mods_addr,
-                                      mbi->mods_addr + i);
-        tmp = phystokv(mbi->mods_addr);
-
-        for (i = 0; i < mbi->mods_count; i++) {
-            mod = (struct multiboot_raw_module *)tmp + i;
-            biosmem_find_boot_data_update(min, &start, &end, mod->mod_start,
-                                          mod->mod_end);
-
-            if (mod->string != 0)
-                biosmem_find_boot_data_update(min, &start, &end, mod->string,
-                                              mod->string + mod->reserved);
-        }
-    }
+    phys_addr_t orig_start;
+    unsigned int i;
+    int error;
 
-    if (mbi->flags & MULTIBOOT_LOADER_SHDR) {
-        tmp = mbi->shdr_num * mbi->shdr_size;
-        biosmem_find_boot_data_update(min, &start, &end, mbi->shdr_addr,
-                                      mbi->shdr_addr + tmp);
-        tmp = phystokv(mbi->shdr_addr);
+    assert(start <= end);
 
-        for (i = 0; i < mbi->shdr_num; i++) {
-            shdr = (struct elf_shdr *)(tmp + (i * mbi->shdr_size));
+    orig_start = start;
+    start = vm_page_round(start);
+    end = vm_page_trunc(end);
 
-            if ((shdr->type != ELF_SHT_SYMTAB)
-                && (shdr->type != ELF_SHT_STRTAB))
-                continue;
+    if ((start < orig_start) || (start >= end)) {
+        return -1;
+    }
+
+    *avail_start = start;
+    *avail_end = end;
+
+    for (i = 0; i < biosmem_nr_boot_data; i++) {
+        error = biosmem_find_avail_clip(avail_start, avail_end,
+                                        biosmem_boot_data_array[i].start,
+                                        biosmem_boot_data_array[i].end);
 
-            biosmem_find_boot_data_update(min, &start, &end, shdr->addr,
-                                          shdr->addr + shdr->size);
+        if (error) {
+            return -1;
         }
     }
 
-    if (start == max)
-        return 0;
-
-    *endp = end;
-    return start;
+    return 0;
 }
 
+#ifndef MACH_HYP
+
 static void __boot
-biosmem_setup_allocator(struct multiboot_raw_info *mbi)
+biosmem_setup_allocator(const struct multiboot_raw_info *mbi)
 {
-    uint32_t heap_start, heap_end, max_heap_start, max_heap_end;
-    uint32_t mem_end, next;
+    phys_addr_t heap_start, heap_end, max_heap_start, max_heap_end;
+    phys_addr_t start, end;
+    int error;
 
     /*
      * Find some memory for the heap. Look for the largest unused area in
      * upper memory, carefully avoiding all boot data.
      */
-    mem_end = vm_page_trunc((mbi->mem_upper + 1024) << 10);
+    end = vm_page_trunc((mbi->mem_upper + 1024) << 10);
 
 #ifndef __LP64__
-    if (mem_end > VM_PAGE_DIRECTMAP_LIMIT)
-        mem_end = VM_PAGE_DIRECTMAP_LIMIT;
+    if (end > VM_PAGE_DIRECTMAP_LIMIT)
+        end = VM_PAGE_DIRECTMAP_LIMIT;
 #endif /* __LP64__ */
 
     max_heap_start = 0;
     max_heap_end = 0;
-    next = BIOSMEM_END;
+    start = BIOSMEM_END;
 
-    do {
-        heap_start = next;
-        heap_end = biosmem_find_boot_data(mbi, heap_start, mem_end, &next);
+    for (;;) {
+        error = biosmem_find_avail(start, end, &heap_start, &heap_end);
 
-        if (heap_end == 0) {
-            heap_end = mem_end;
-            next = 0;
+        if (error) {
+            break;
         }
 
         if ((heap_end - heap_start) > (max_heap_end - max_heap_start)) {
             max_heap_start = heap_start;
             max_heap_end = heap_end;
         }
-    } while (next != 0);
 
-    max_heap_start = vm_page_round(max_heap_start);
-    max_heap_end = vm_page_trunc(max_heap_end);
+        start = heap_end;
+    }
 
     if (max_heap_start >= max_heap_end)
         boot_panic(biosmem_panic_setup_msg);
 
     biosmem_heap_start = max_heap_start;
     biosmem_heap_end = max_heap_end;
-    biosmem_heap_cur = biosmem_heap_end;
+    biosmem_heap_bottom = biosmem_heap_start;
+    biosmem_heap_top = biosmem_heap_end;
+    biosmem_heap_topdown = TRUE;
+
+    /* Prevent biosmem_free_usable() from releasing the heap */
+    biosmem_register_boot_data(biosmem_heap_start, biosmem_heap_end, FALSE);
 }
 
 #endif /* MACH_HYP */
@@ -544,7 +650,7 @@ biosmem_setup_allocator(struct multiboot_raw_info *mbi)
 static void __boot
 biosmem_bootstrap_common(void)
 {
-    phys_addr_t phys_start, phys_end, last_addr;
+    phys_addr_t phys_start, phys_end;
     int error;
 
     biosmem_map_adjust();
@@ -557,7 +663,6 @@ biosmem_bootstrap_common(void)
         boot_panic(biosmem_panic_noseg_msg);
 
     biosmem_set_segment(VM_PAGE_SEG_DMA, phys_start, phys_end);
-    last_addr = phys_end;
 
     phys_start = VM_PAGE_DMA_LIMIT;
 #ifdef VM_PAGE_DMA32_LIMIT
@@ -565,10 +670,9 @@ biosmem_bootstrap_common(void)
     error = biosmem_map_find_avail(&phys_start, &phys_end);
 
     if (error)
-        goto out;
+        return;
 
     biosmem_set_segment(VM_PAGE_SEG_DMA32, phys_start, phys_end);
-    last_addr = phys_end;
 
     phys_start = VM_PAGE_DMA32_LIMIT;
 #endif /* VM_PAGE_DMA32_LIMIT */
@@ -576,23 +680,18 @@ biosmem_bootstrap_common(void)
     error = biosmem_map_find_avail(&phys_start, &phys_end);
 
     if (error)
-        goto out;
+        return;
 
     biosmem_set_segment(VM_PAGE_SEG_DIRECTMAP, phys_start, phys_end);
-    last_addr = phys_end;
 
     phys_start = VM_PAGE_DIRECTMAP_LIMIT;
     phys_end = VM_PAGE_HIGHMEM_LIMIT;
     error = biosmem_map_find_avail(&phys_start, &phys_end);
 
     if (error)
-        goto out;
+        return;
 
     biosmem_set_segment(VM_PAGE_SEG_HIGHMEM, phys_start, phys_end);
-
-out:
-    /* XXX phys_last_addr must be part of the direct physical mapping */
-    phys_last_addr = last_addr;
 }
 
 #ifdef MACH_HYP
@@ -616,26 +715,34 @@ biosmem_xen_bootstrap(void)
     biosmem_heap_end = boot_info.nr_pages << PAGE_SHIFT;
 
 #ifndef __LP64__
-    /* TODO Check that this actually makes sense */
     if (biosmem_heap_end > VM_PAGE_DIRECTMAP_LIMIT)
         biosmem_heap_end = VM_PAGE_DIRECTMAP_LIMIT;
 #endif /* __LP64__ */
 
+    biosmem_heap_bottom = biosmem_heap_start;
+    biosmem_heap_top = biosmem_heap_end;
+
     /*
-     * XXX Allocation on Xen must be bottom-up :
+     * XXX Allocation on Xen are initially bottom-up :
      * At the "start of day", only 512k are available after the boot
      * data. The pmap module then creates a 4g mapping so all physical
      * memory is available, but it uses this allocator to do so.
      * Therefore, it must return pages from this small 512k regions
      * first.
      */
-    biosmem_heap_cur = biosmem_heap_start;
+    biosmem_heap_topdown = FALSE;
+
+    /*
+     * Prevent biosmem_free_usable() from releasing the Xen boot information
+     * and the heap.
+     */
+    biosmem_register_boot_data(0, biosmem_heap_end, FALSE);
 }
 
 #else /* MACH_HYP */
 
 void __boot
-biosmem_bootstrap(struct multiboot_raw_info *mbi)
+biosmem_bootstrap(const struct multiboot_raw_info *mbi)
 {
     if (mbi->flags & MULTIBOOT_LOADER_MMAP)
         biosmem_map_build(mbi);
@@ -643,12 +750,6 @@ biosmem_bootstrap(struct multiboot_raw_info *mbi)
         biosmem_map_build_simple(mbi);
 
     biosmem_bootstrap_common();
-
-    /*
-     * The kernel and modules command lines will be memory mapped later
-     * during initialization. Their respective sizes must be saved.
-     */
-    biosmem_save_cmdline_sizes(mbi);
     biosmem_setup_allocator(mbi);
 }
 
@@ -659,34 +760,37 @@ biosmem_bootalloc(unsigned int nr_pages)
 {
     unsigned long addr, size;
 
-    assert(!vm_page_ready());
-
     size = vm_page_ptoa(nr_pages);
 
     if (size == 0)
         boot_panic(biosmem_panic_inval_msg);
 
-#ifdef MACH_HYP
-    addr = biosmem_heap_cur;
-#else /* MACH_HYP */
-    /* Top-down allocation to avoid unnecessarily filling DMA segments */
-    addr = biosmem_heap_cur - size;
-#endif /* MACH_HYP */
+    if (biosmem_heap_topdown) {
+        addr = biosmem_heap_top - size;
 
-    if ((addr < biosmem_heap_start) || (addr > biosmem_heap_cur))
-        boot_panic(biosmem_panic_nomem_msg);
+        if ((addr < biosmem_heap_start) || (addr > biosmem_heap_top)) {
+            boot_panic(biosmem_panic_nomem_msg);
+        }
 
-#ifdef MACH_HYP
-    biosmem_heap_cur += size;
-#else /* MACH_HYP */
-    biosmem_heap_cur = addr;
-#endif /* MACH_HYP */
+        biosmem_heap_top = addr;
+    } else {
+        unsigned long end;
+
+        addr = biosmem_heap_bottom;
+        end = addr + size;
+
+        if ((end > biosmem_heap_end) || (end < biosmem_heap_bottom)) {
+            boot_panic(biosmem_panic_nomem_msg);
+        }
+
+        biosmem_heap_bottom = end;
+    }
 
     return addr;
 }
 
 phys_addr_t __boot
-biosmem_directmap_size(void)
+biosmem_directmap_end(void)
 {
     if (biosmem_segment_size(VM_PAGE_SEG_DIRECTMAP) != 0)
         return biosmem_segment_end(VM_PAGE_SEG_DIRECTMAP);
@@ -696,6 +800,8 @@ biosmem_directmap_size(void)
         return biosmem_segment_end(VM_PAGE_SEG_DMA);
 }
 
+#if DEBUG
+
 static const char * __init
 biosmem_type_desc(unsigned int type)
 {
@@ -729,16 +835,23 @@ biosmem_map_show(void)
                entry->base_addr + entry->length,
                biosmem_type_desc(entry->type));
 
-    printf("biosmem: heap: %x-%x\n", biosmem_heap_start, biosmem_heap_end);
+    printf("biosmem: heap: %llx:%llx\n",
+           (unsigned long long)biosmem_heap_start,
+           (unsigned long long)biosmem_heap_end);
 }
 
+#else /* DEBUG */
+#define biosmem_map_show()
+#endif /* DEBUG */
+
 static void __init
-biosmem_load_segment(struct biosmem_segment *seg, uint64_t max_phys_end,
-                     phys_addr_t phys_start, phys_addr_t phys_end,
-                     phys_addr_t avail_start, phys_addr_t avail_end)
+biosmem_load_segment(struct biosmem_segment *seg, uint64_t max_phys_end)
 {
+    phys_addr_t phys_start, phys_end, avail_start, avail_end;
     unsigned int seg_index;
 
+    phys_start = seg->start;
+    phys_end = seg->end;
     seg_index = seg - biosmem_segments;
 
     if (phys_end > max_phys_end) {
@@ -753,15 +866,28 @@ biosmem_load_segment(struct biosmem_segment *seg, 
uint64_t max_phys_end,
         phys_end = max_phys_end;
     }
 
-    if ((avail_start < phys_start) || (avail_start >= phys_end))
-        avail_start = phys_start;
+    vm_page_load(seg_index, phys_start, phys_end);
 
-    if ((avail_end <= phys_start) || (avail_end > phys_end))
-        avail_end = phys_end;
+    /*
+     * Clip the remaining available heap to fit it into the loaded
+     * segment if possible.
+     */
 
-    seg->avail_start = avail_start;
-    seg->avail_end = avail_end;
-    vm_page_load(seg_index, phys_start, phys_end, avail_start, avail_end);
+    if ((biosmem_heap_top > phys_start) && (biosmem_heap_bottom < phys_end)) {
+        if (biosmem_heap_bottom >= phys_start) {
+            avail_start = biosmem_heap_bottom;
+        } else {
+            avail_start = phys_start;
+        }
+
+        if (biosmem_heap_top <= phys_end) {
+            avail_end = biosmem_heap_top;
+        } else {
+            avail_end = phys_end;
+        }
+
+        vm_page_load_heap(seg_index, avail_start, avail_end);
+    }
 }
 
 void __init
@@ -777,8 +903,25 @@ biosmem_setup(void)
             break;
 
         seg = &biosmem_segments[i];
-        biosmem_load_segment(seg, VM_PAGE_HIGHMEM_LIMIT, seg->start, seg->end,
-                             biosmem_heap_start, biosmem_heap_cur);
+        biosmem_load_segment(seg, VM_PAGE_HIGHMEM_LIMIT);
+    }
+}
+
+static void __init
+biosmem_unregister_temporary_boot_data(void)
+{
+    struct biosmem_boot_data *data;
+    unsigned int i;
+
+    for (i = 0; i < biosmem_nr_boot_data; i++) {
+        data = &biosmem_boot_data_array[i];
+
+        if (!data->temporary) {
+            continue;
+        }
+
+        biosmem_unregister_boot_data(data->start, data->end);
+        i = (unsigned int)-1;
     }
 }
 
@@ -787,9 +930,11 @@ biosmem_free_usable_range(phys_addr_t start, phys_addr_t 
end)
 {
     struct vm_page *page;
 
-    printf("biosmem: release to vm_page: %llx-%llx (%lluk)\n",
+#if DEBUG
+    printf("biosmem: release to vm_page: %llx:%llx (%lluk)\n",
            (unsigned long long)start, (unsigned long long)end,
            (unsigned long long)((end - start) >> 10));
+#endif
 
     while (start < end) {
         page = vm_page_lookup_pa(start);
@@ -800,85 +945,20 @@ biosmem_free_usable_range(phys_addr_t start, phys_addr_t 
end)
 }
 
 static void __init
-biosmem_free_usable_update_start(phys_addr_t *start, phys_addr_t res_start,
-                                 phys_addr_t res_end)
-{
-    if ((*start >= res_start) && (*start < res_end))
-        *start = res_end;
-}
-
-static phys_addr_t __init
-biosmem_free_usable_start(phys_addr_t start)
-{
-    const struct biosmem_segment *seg;
-    unsigned int i;
-
-    biosmem_free_usable_update_start(&start, _kvtophys(&_start),
-                                     _kvtophys(&_end));
-    biosmem_free_usable_update_start(&start, biosmem_heap_start,
-                                     biosmem_heap_end);
-
-    for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) {
-        seg = &biosmem_segments[i];
-        biosmem_free_usable_update_start(&start, seg->avail_start,
-                                         seg->avail_end);
-    }
-
-    return start;
-}
-
-static int __init
-biosmem_free_usable_reserved(phys_addr_t addr)
-{
-    const struct biosmem_segment *seg;
-    unsigned int i;
-
-    if ((addr >= _kvtophys(&_start))
-        && (addr < _kvtophys(&_end)))
-        return 1;
-
-    if ((addr >= biosmem_heap_start) && (addr < biosmem_heap_end))
-        return 1;
-
-    for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) {
-        seg = &biosmem_segments[i];
-
-        if ((addr >= seg->avail_start) && (addr < seg->avail_end))
-            return 1;
-    }
-
-    return 0;
-}
-
-static phys_addr_t __init
-biosmem_free_usable_end(phys_addr_t start, phys_addr_t entry_end)
-{
-    while (start < entry_end) {
-        if (biosmem_free_usable_reserved(start))
-            break;
-
-        start += PAGE_SIZE;
-    }
-
-    return start;
-}
-
-static void __init
 biosmem_free_usable_entry(phys_addr_t start, phys_addr_t end)
 {
-    phys_addr_t entry_end;
-
-    entry_end = end;
+    phys_addr_t avail_start, avail_end;
+    int error;
 
     for (;;) {
-        start = biosmem_free_usable_start(start);
+        error = biosmem_find_avail(start, end, &avail_start, &avail_end);
 
-        if (start >= entry_end)
-            return;
+        if (error) {
+            break;
+        }
 
-        end = biosmem_free_usable_end(start, entry_end);
-        biosmem_free_usable_range(start, end);
-        start = end;
+        biosmem_free_usable_range(avail_start, avail_end);
+        start = avail_end;
     }
 }
 
@@ -889,6 +969,8 @@ biosmem_free_usable(void)
     uint64_t start, end;
     unsigned int i;
 
+    biosmem_unregister_temporary_boot_data();
+
     for (i = 0; i < biosmem_map_size; i++) {
         entry = &biosmem_map[i];
 
@@ -902,9 +984,17 @@ biosmem_free_usable(void)
 
         end = vm_page_trunc(entry->base_addr + entry->length);
 
+        if (end > VM_PAGE_HIGHMEM_LIMIT) {
+            end = VM_PAGE_HIGHMEM_LIMIT;
+        }
+
         if (start < BIOSMEM_BASE)
             start = BIOSMEM_BASE;
 
+        if (start >= end) {
+            continue;
+        }
+
         biosmem_free_usable_entry(start, end);
     }
 }
diff --git a/i386/i386at/biosmem.h b/i386/i386at/biosmem.h
index 1db63f9..7824c16 100644
--- a/i386/i386at/biosmem.h
+++ b/i386/i386at/biosmem.h
@@ -40,36 +40,51 @@
 #define BIOSMEM_END         0x100000
 
 /*
- * Early initialization of the biosmem module.
+ * Report reserved addresses to the biosmem module.
  *
- * This function processes the given multiboot data for BIOS-provided
- * memory information, and sets up a bootstrap physical page allocator.
+ * Once all boot data have been registered, the user can set up the
+ * early page allocator.
  *
- * It is called before paging is enabled.
+ * If the range is marked temporary, it will be unregistered when
+ * biosmem_free_usable() is called, so that pages that used to store
+ * these boot data may be released to the VM system.
+ */
+void biosmem_register_boot_data(phys_addr_t start, phys_addr_t end,
+                                boolean_t temporary);
+
+/*
+ * Initialize the early page allocator.
+ *
+ * This function uses the memory map provided by the boot loader along
+ * with the registered boot data addresses to set up a heap of free pages
+ * of physical memory.
+ *
+ * Note that on Xen, this function registers all the Xen boot information
+ * as boot data itself.
  */
 #ifdef MACH_HYP
 void biosmem_xen_bootstrap(void);
 #else /* MACH_HYP */
-void biosmem_bootstrap(struct multiboot_raw_info *mbi);
+void biosmem_bootstrap(const struct multiboot_raw_info *mbi);
 #endif /* MACH_HYP */
 
 /*
  * Allocate contiguous physical pages during bootstrap.
  *
- * This function is called before paging is enabled. It should only be used
- * to allocate initial page table pages. Those pages are later loaded into
- * the VM system (as reserved pages) which means they can be freed like other
- * regular pages. Users should fix up the type of those pages once the VM
- * system is initialized.
+ * The pages returned are guaranteed to be part of the direct physical
+ * mapping when paging is enabled.
+ *
+ * This function should only be used to allocate initial page table pages.
+ * Those pages are later loaded into the VM system (as reserved pages)
+ * which means they can be freed like other regular pages. Users should
+ * fix up the type of those pages once the VM system is initialized.
  */
 unsigned long biosmem_bootalloc(unsigned int nr_pages);
 
 /*
- * Return the amount of physical memory that can be directly mapped.
- *
- * This includes the size of both the DMA/DMA32 and DIRECTMAP segments.
+ * Return the limit of physical memory that can be directly mapped.
  */
-phys_addr_t biosmem_directmap_size(void);
+phys_addr_t biosmem_directmap_end(void);
 
 /*
  * Set up physical memory based on the information obtained during bootstrap
@@ -80,8 +95,8 @@ void biosmem_setup(void);
 /*
  * Free all usable memory.
  *
- * This includes ranges that weren't part of the bootstrap allocator initial
- * heap, e.g. because they contained boot data.
+ * This function releases all pages that aren't used by boot data and have
+ * not already been loaded into the VM system.
  */
 void biosmem_free_usable(void);
 
diff --git a/i386/i386at/boothdr.S b/i386/i386at/boothdr.S
index 567851e..9339cb9 100644
--- a/i386/i386at/boothdr.S
+++ b/i386/i386at/boothdr.S
@@ -17,9 +17,9 @@ _start:
        /* MultiBoot header - see multiboot.h.  */
 #define        MULTIBOOT_MAGIC         0x1BADB002
 #ifdef __ELF__
-#define MULTIBOOT_FLAGS                0x00000002
+#define MULTIBOOT_FLAGS                0x00000003
 #else  /* __ELF__ */
-#define MULTIBOOT_FLAGS                0x00010002
+#define MULTIBOOT_FLAGS                0x00010003
 #endif /* __ELF__ */
        P2ALIGN(2)
 boot_hdr:
diff --git a/i386/i386at/interrupt.S b/i386/i386at/interrupt.S
index e238ea4..cdb385c 100644
--- a/i386/i386at/interrupt.S
+++ b/i386/i386at/interrupt.S
@@ -49,3 +49,4 @@ ENTRY(interrupt)
        outb    %al,$(PIC_SLAVE_ICW)
 1:
        ret                             /* return */
+END(interrupt)
diff --git a/i386/i386at/mem.c b/i386/i386at/mem.c
index f239afa..eac2549 100644
--- a/i386/i386at/mem.c
+++ b/i386/i386at/mem.c
@@ -36,12 +36,24 @@ dev_t               dev;
 vm_offset_t    off;
 vm_prot_t      prot;
 {
+       struct vm_page *p;
+
        if (off == 0)
                return 0;
-       else if (off < 0xa0000)
-               return -1;
-       else if (off >= 0x100000 && off < phys_last_addr)
+
+       /*
+        * The legacy device mappings are included in the page tables and
+        * need their own test.
+        */
+       if (off >= 0xa0000 && off < 0x100000)
+               goto out;
+
+       p = vm_page_lookup_pa(off);
+
+       if (p != NULL) {
                return -1;
-       else
-               return i386_btop(off);
+       }
+
+out:
+       return i386_btop(off);
 }
diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c
index 679d524..239f63f 100644
--- a/i386/i386at/model_dep.c
+++ b/i386/i386at/model_dep.c
@@ -66,6 +66,7 @@
 #include <i386/model_dep.h>
 #include <i386at/autoconf.h>
 #include <i386at/biosmem.h>
+#include <i386at/elf.h>
 #include <i386at/idt.h>
 #include <i386at/int_init.h>
 #include <i386at/kd.h>
@@ -105,11 +106,6 @@ static unsigned elf_shdr_shndx;
 
 #define RESERVED_BIOS 0x10000
 
-/* These indicate the total extent of physical memory addresses we're using.
-   They are page-aligned.  */
-vm_offset_t phys_first_addr = 0;
-vm_offset_t phys_last_addr;
-
 /* A copy of the multiboot info structure passed by the boot loader.  */
 #ifdef MACH_XEN
 struct start_info boot_info;
@@ -153,6 +149,14 @@ void machine_init(void)
        cninit();
 
        /*
+        * Make more free memory.
+        *
+        * This is particularly important for the Linux drivers which
+        * require available DMA memory.
+        */
+       biosmem_free_usable();
+
+       /*
         * Set up to use floating point.
         */
        init_fpu();
@@ -264,6 +268,67 @@ void db_reset_cpu(void)
        halt_all_cpus(1);
 }
 
+#ifndef        MACH_HYP
+
+static void
+register_boot_data(const struct multiboot_raw_info *mbi)
+{
+       struct multiboot_raw_module *mod;
+       struct elf_shdr *shdr;
+       unsigned long tmp;
+       unsigned int i;
+
+       extern char _start[], _end[];
+
+       /* XXX For now, register all boot data as permanent */
+
+       biosmem_register_boot_data(_kvtophys(&_start), _kvtophys(&_end), FALSE);
+
+       if ((mbi->flags & MULTIBOOT_LOADER_CMDLINE) && (mbi->cmdline != 0)) {
+               biosmem_register_boot_data(mbi->cmdline,
+                                          mbi->cmdline
+                                          + strlen((void 
*)phystokv(mbi->cmdline)) + 1, FALSE);
+       }
+
+       if (mbi->flags & MULTIBOOT_LOADER_MODULES) {
+               i = mbi->mods_count * sizeof(struct multiboot_raw_module);
+               biosmem_register_boot_data(mbi->mods_addr, mbi->mods_addr + i, 
FALSE);
+
+               tmp = phystokv(mbi->mods_addr);
+
+               for (i = 0; i < mbi->mods_count; i++) {
+                       mod = (struct multiboot_raw_module *)tmp + i;
+                       biosmem_register_boot_data(mod->mod_start, 
mod->mod_end, FALSE);
+
+                       if (mod->string != 0) {
+                               biosmem_register_boot_data(mod->string,
+                                                          mod->string
+                                                          + strlen((void 
*)phystokv(mod->string)) + 1,
+                                                          FALSE);
+                       }
+               }
+       }
+
+       if (mbi->flags & MULTIBOOT_LOADER_SHDR) {
+               tmp = mbi->shdr_num * mbi->shdr_size;
+               biosmem_register_boot_data(mbi->shdr_addr, mbi->shdr_addr + 
tmp, FALSE);
+
+               tmp = phystokv(mbi->shdr_addr);
+
+               for (i = 0; i < mbi->shdr_num; i++) {
+                       shdr = (struct elf_shdr *)(tmp + (i * mbi->shdr_size));
+
+                       if ((shdr->type != ELF_SHT_SYMTAB)
+                           && (shdr->type != ELF_SHT_STRTAB))
+                               continue;
+
+                       biosmem_register_boot_data(shdr->addr, shdr->addr + 
shdr->size, FALSE);
+               }
+       }
+}
+
+#endif /* MACH_HYP */
+
 /*
  * Basic PC VM initialization.
  * Turns on paging and changes the kernel segments to use high linear 
addresses.
@@ -291,6 +356,7 @@ i386at_init(void)
 #ifdef MACH_HYP
        biosmem_xen_bootstrap();
 #else /* MACH_HYP */
+       register_boot_data((struct multiboot_raw_info *) &boot_info);
        biosmem_bootstrap((struct multiboot_raw_info *) &boot_info);
 #endif /* MACH_HYP */
 
@@ -619,11 +685,6 @@ resettodr(void)
        writetodc();
 }
 
-unsigned int pmap_free_pages(void)
-{
-       return vm_page_atop(phys_last_addr); /* XXX */
-}
-
 boolean_t
 init_alloc_aligned(vm_size_t size, vm_offset_t *addrp)
 {
@@ -646,15 +707,3 @@ pmap_grab_page(void)
                panic("Not enough memory to initialize Mach");
        return addr;
 }
-
-boolean_t pmap_valid_page(vm_offset_t x)
-{
-       /* XXX is this OK?  What does it matter for?  */
-       return (((phys_first_addr <= x) && (x < phys_last_addr))
-#ifndef MACH_HYP
-               && !(
-               ((boot_info.mem_lower * 1024) <= x) && 
-               (x < 1024*1024))
-#endif /* MACH_HYP */
-               );
-}
diff --git a/i386/include/mach/i386/asm.h b/i386/include/mach/i386/asm.h
index 4e3b589..45b848d 100644
--- a/i386/include/mach/i386/asm.h
+++ b/i386/include/mach/i386/asm.h
@@ -96,24 +96,24 @@
 #ifdef GPROF
 
 #define MCOUNT         .data; gLB(9) .long 0; .text; lea LBb(x, 9),%edx; call 
mcount
-#define        ENTRY(x)        .globl EXT(x); .p2align TEXT_ALIGN; LEXT(x) ; \
+#define        ENTRY(x)        .globl EXT(x); .type EXT(x), @function; 
.p2align TEXT_ALIGN; LEXT(x) ; \
                        pushl %ebp; movl %esp, %ebp; MCOUNT; popl %ebp;
-#define        ENTRY2(x,y)     .globl EXT(x); .globl EXT(y); \
+#define        ENTRY2(x,y)     .globl EXT(x); .type EXT(x), @function; .globl 
EXT(y); .type EXT(y), @function; \
                        .p2align TEXT_ALIGN; LEXT(x) LEXT(y)
-#define        ASENTRY(x)      .globl x; .p2align TEXT_ALIGN; gLB(x) ; \
+#define        ASENTRY(x)      .globl x; .type x, @function; .p2align 
TEXT_ALIGN; gLB(x) ; \
                        pushl %ebp; movl %esp, %ebp; MCOUNT; popl %ebp;
 #define        END(x)          .size x,.-x
 #else  /* GPROF */
 
 #define MCOUNT
-#define        ENTRY(x)        .globl EXT(x); .p2align TEXT_ALIGN; LEXT(x)
-#define        ENTRY2(x,y)     .globl EXT(x); .globl EXT(y); \
+#define        ENTRY(x)        .globl EXT(x); .type EXT(x), @function; 
.p2align TEXT_ALIGN; LEXT(x)
+#define        ENTRY2(x,y)     .globl EXT(x); .type EXT(x), @function; .globl 
EXT(y); .type EXT(y), @function; \
                        .p2align TEXT_ALIGN; LEXT(x) LEXT(y)
-#define        ASENTRY(x)      .globl x; .p2align TEXT_ALIGN; gLB(x)
+#define        ASENTRY(x)      .globl x; .type x, @function; .p2align 
TEXT_ALIGN; gLB(x)
 #define        END(x)          .size x,.-x
 #endif /* GPROF */
 
-#define        Entry(x)        .globl EXT(x); .p2align TEXT_ALIGN; LEXT(x)
+#define        Entry(x)        .globl EXT(x); .type EXT(x), @function; 
.p2align TEXT_ALIGN; LEXT(x)
 #define        DATA(x)         .globl EXT(x); .p2align DATA_ALIGN; LEXT(x)
 
 #endif /* _MACH_I386_ASM_H_ */
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
index e362b45..096e6fd 100644
--- a/i386/intel/pmap.c
+++ b/i386/intel/pmap.c
@@ -83,6 +83,7 @@
 #include <i386/proc_reg.h>
 #include <i386/locore.h>
 #include <i386/model_dep.h>
+#include <i386at/biosmem.h>
 #include <i386at/model_dep.h>
 
 #ifdef MACH_PSEUDO_PHYS
@@ -158,9 +159,9 @@ vm_offset_t kernel_virtual_end;
 
 /*
  *     Index into pv_head table, its lock bits, and the modify/reference
- *     bits starting at phys_first_addr.
+ *     bits.
  */
-#define pa_index(pa)   (atop(pa - phys_first_addr))
+#define pa_index(pa)   vm_page_table_index(pa)
 
 #define pai_to_pvh(pai)                (&pv_head_table[pai])
 #define lock_pvh_pai(pai)      (bit_lock(pai, pv_lock_table))
@@ -326,12 +327,7 @@ lock_data_t        pmap_system_lock;
 
 #endif /* NCPUS > 1 */
 
-#define MAX_TBIS_SIZE  32              /* > this -> TBIA */ /* XXX */
-
 #ifdef MACH_PV_PAGETABLES
-#if 1
-#define INVALIDATE_TLB(pmap, s, e) hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL)
-#else
 #define INVALIDATE_TLB(pmap, s, e) do { \
        if (__builtin_constant_p((e) - (s)) \
                && (e) - (s) == PAGE_SIZE) \
@@ -339,26 +335,16 @@ lock_data_t       pmap_system_lock;
        else \
                hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL); \
 } while(0)
-#endif
 #else  /* MACH_PV_PAGETABLES */
-#if 0
 /* It is hard to know when a TLB flush becomes less expensive than a bunch of
  * invlpgs.  But it surely is more expensive than just one invlpg.  */
-#define INVALIDATE_TLB(pmap, s, e) { \
+#define INVALIDATE_TLB(pmap, s, e) do { \
        if (__builtin_constant_p((e) - (s)) \
                && (e) - (s) == PAGE_SIZE) \
-               invlpg_linear(s); \
+               invlpg_linear((pmap) == kernel_pmap ? kvtolin(s) : (s)); \
        else \
                flush_tlb(); \
-}
-#else
-#define INVALIDATE_TLB(pmap, s, e) { \
-       (void) (pmap); \
-       (void) (s); \
-       (void) (e); \
-       flush_tlb(); \
-}
-#endif
+} while (0)
 #endif /* MACH_PV_PAGETABLES */
 
 
@@ -497,8 +483,8 @@ void ptep_check(ptep_t ptep)
  */
 vm_offset_t pmap_map(
        vm_offset_t     virt,
-       vm_offset_t     start,
-       vm_offset_t     end,
+       phys_addr_t     start,
+       phys_addr_t     end,
        int             prot)
 {
        int             ps;
@@ -514,14 +500,14 @@ vm_offset_t pmap_map(
 
 /*
  *     Back-door routine for mapping kernel VM at initialization.
- *     Useful for mapping memory outside the range
- *     [phys_first_addr, phys_last_addr) (i.e., devices).
+ *     Useful for mapping memory outside the range of direct mapped
+ *     physical memory (i.e., devices).
  *     Otherwise like pmap_map.
  */
 vm_offset_t pmap_map_bd(
        vm_offset_t     virt,
-       vm_offset_t     start,
-       vm_offset_t     end,
+       phys_addr_t     start,
+       phys_addr_t     end,
        vm_prot_t       prot)
 {
        pt_entry_t      template;
@@ -615,8 +601,8 @@ void pmap_bootstrap(void)
         * mapped into the kernel address space,
         * and extends to a stupid arbitrary limit beyond that.
         */
-       kernel_virtual_start = phystokv(phys_last_addr);
-       kernel_virtual_end = phystokv(phys_last_addr) + VM_KERNEL_MAP_SIZE;
+       kernel_virtual_start = phystokv(biosmem_directmap_end());
+       kernel_virtual_end = kernel_virtual_start + VM_KERNEL_MAP_SIZE;
 
        if (kernel_virtual_end < kernel_virtual_start
                        || kernel_virtual_end > VM_MAX_KERNEL_ADDRESS)
@@ -707,8 +693,7 @@ void pmap_bootstrap(void)
                pt_entry_t global = CPU_HAS_FEATURE(CPU_FEATURE_PGE) ? 
INTEL_PTE_GLOBAL : 0;
 
                /*
-                * Map virtual memory for all known physical memory, 1-1,
-                * from phys_first_addr to phys_last_addr.
+                * Map virtual memory for all directly mappable physical 
memory, 1-1,
                 * Make any mappings completely in the kernel's text segment 
read-only.
                 *
                 * Also allocate some additional all-null page tables afterwards
@@ -717,7 +702,7 @@ void pmap_bootstrap(void)
                 * to allocate new kernel page tables later.
                 * XX fix this
                 */
-               for (va = phystokv(phys_first_addr); va >= 
phystokv(phys_first_addr) && va < kernel_virtual_end; )
+               for (va = phystokv(0); va >= phystokv(0) && va < 
kernel_virtual_end; )
                {
                        pt_entry_t *pde = kernel_page_dir + 
lin2pdenum(kvtolin(va));
                        pt_entry_t *ptable = 
(pt_entry_t*)phystokv(pmap_grab_page());
@@ -728,7 +713,7 @@ void pmap_bootstrap(void)
                                | INTEL_PTE_VALID | INTEL_PTE_WRITE);
 
                        /* Initialize the page table.  */
-                       for (pte = ptable; (va < phystokv(phys_last_addr)) && 
(pte < ptable+NPTES); pte++)
+                       for (pte = ptable; (va < 
phystokv(biosmem_directmap_end())) && (pte < ptable+NPTES); pte++)
                        {
                                if ((pte - ptable) < ptenum(va))
                                {
@@ -906,13 +891,20 @@ pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry)
 {
        pmap_mapwindow_t *map;
 
+       assert(entry != 0);
+
        /* Find an empty one.  */
        for (map = &mapwindows[0]; map < &mapwindows[sizeof (mapwindows) / 
sizeof (*mapwindows)]; map++)
                if (!(*map->entry))
                        break;
        assert(map < &mapwindows[sizeof (mapwindows) / sizeof (*mapwindows)]);
 
+#ifdef MACH_PV_PAGETABLES
+       if (!hyp_mmu_update_pte(kv_to_ma(map->entry), pa_to_ma(entry)))
+               panic("pmap_get_mapwindow");
+#else /* MACH_PV_PAGETABLES */
        WRITE_PTE(map->entry, entry);
+#endif /* MACH_PV_PAGETABLES */
        return map;
 }
 
@@ -921,7 +913,12 @@ pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry)
  */
 void pmap_put_mapwindow(pmap_mapwindow_t *map)
 {
+#ifdef MACH_PV_PAGETABLES
+       if (!hyp_mmu_update_pte(kv_to_ma(map->entry), 0))
+               panic("pmap_put_mapwindow");
+#else /* MACH_PV_PAGETABLES */
        WRITE_PTE(map->entry, 0);
+#endif /* MACH_PV_PAGETABLES */
        PMAP_UPDATE_TLBS(kernel_pmap, map->vaddr, map->vaddr + PAGE_SIZE);
 }
 
@@ -940,7 +937,7 @@ void pmap_virtual_space(
  */
 void pmap_init(void)
 {
-       long                    npages;
+       unsigned long           npages;
        vm_offset_t             addr;
        vm_size_t               s;
 #if    NCPUS > 1
@@ -952,7 +949,7 @@ void pmap_init(void)
         *      the modify bit array, and the pte_page table.
         */
 
-       npages = atop(phys_last_addr - phys_first_addr);
+       npages = vm_page_table_size();
        s = (vm_size_t) (sizeof(struct pv_entry) * npages
                                + pv_lock_table_size(npages)
                                + npages);
@@ -1000,31 +997,16 @@ void pmap_init(void)
        pmap_initialized = TRUE;
 }
 
-#define valid_page(x) (pmap_initialized && pmap_valid_page(x))
-
-boolean_t pmap_verify_free(vm_offset_t phys)
+static inline boolean_t
+valid_page(phys_addr_t addr)
 {
-       pv_entry_t      pv_h;
-       int             pai;
-       int             spl;
-       boolean_t       result;
+       struct vm_page *p;
 
-       assert(phys != vm_page_fictitious_addr);
        if (!pmap_initialized)
-               return(TRUE);
-
-       if (!pmap_valid_page(phys))
-               return(FALSE);
-
-       PMAP_WRITE_LOCK(spl);
-
-       pai = pa_index(phys);
-       pv_h = pai_to_pvh(pai);
-
-       result = (pv_h->pmap == PMAP_NULL);
-       PMAP_WRITE_UNLOCK(spl);
+               return FALSE;
 
-       return(result);
+       p = vm_page_lookup_pa(addr);
+       return (p != NULL);
 }
 
 /*
@@ -1049,12 +1031,12 @@ pmap_page_table_page_alloc(void)
         *      Allocate it now if it is missing.
         */
        if (pmap_object == VM_OBJECT_NULL)
-           pmap_object = vm_object_allocate(phys_last_addr - phys_first_addr);
+           pmap_object = vm_object_allocate(vm_page_table_size() * PAGE_SIZE);
 
        /*
         *      Allocate a VM page for the level 2 page table entries.
         */
-       while ((m = vm_page_grab(FALSE)) == VM_PAGE_NULL)
+       while ((m = vm_page_grab()) == VM_PAGE_NULL)
                VM_PAGE_WAIT((void (*)()) 0);
 
        /*
@@ -1232,7 +1214,7 @@ pmap_t pmap_create(vm_size_t size)
 void pmap_destroy(pmap_t p)
 {
        pt_entry_t      *pdep;
-       vm_offset_t     pa;
+       phys_addr_t     pa;
        int             c, s;
        vm_page_t       m;
 
@@ -1327,9 +1309,9 @@ void pmap_remove_range(
        pt_entry_t              *epte)
 {
        pt_entry_t              *cpte;
-       int                     num_removed, num_unwired;
-       int                     pai;
-       vm_offset_t             pa;
+       unsigned long           num_removed, num_unwired;
+       unsigned long           pai;
+       phys_addr_t             pa;
 #ifdef MACH_PV_PAGETABLES
        int n, ii = 0;
        struct mmu_update update[HYP_BATCH_MMU_UPDATES];
@@ -1519,13 +1501,13 @@ void pmap_remove(
  *             page.
  */
 void pmap_page_protect(
-       vm_offset_t     phys,
+       phys_addr_t     phys,
        vm_prot_t       prot)
 {
        pv_entry_t              pv_h, prev;
        pv_entry_t              pv_e;
        pt_entry_t              *pte;
-       int                     pai;
+       unsigned long           pai;
        pmap_t                  pmap;
        int                     spl;
        boolean_t               remove;
@@ -1791,17 +1773,18 @@ void pmap_protect(
 void pmap_enter(
        pmap_t                  pmap,
        vm_offset_t             v,
-       vm_offset_t             pa,
+       phys_addr_t             pa,
        vm_prot_t               prot,
        boolean_t               wired)
 {
+       boolean_t               is_physmem;
        pt_entry_t              *pte;
        pv_entry_t              pv_h;
-       int                     i, pai;
+       unsigned long           i, pai;
        pv_entry_t              pv_e;
        pt_entry_t              template;
        int                     spl;
-       vm_offset_t             old_pa;
+       phys_addr_t             old_pa;
 
        assert(pa != vm_page_fictitious_addr);
 if (pmap_debug) printf("pmap(%lx, %lx)\n", v, pa);
@@ -1926,6 +1909,11 @@ Retry:
            continue;
        }
 
+       if (vm_page_ready())
+               is_physmem = (vm_page_lookup_pa(pa) != NULL);
+       else
+               is_physmem = (pa < biosmem_directmap_end());
+
        /*
         *      Special case if the physical page is already mapped
         *      at this address.
@@ -1947,7 +1935,7 @@ Retry:
            if (prot & VM_PROT_WRITE)
                template |= INTEL_PTE_WRITE;
            if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486
-               && pa >= phys_last_addr)
+               && !is_physmem)
                template |= INTEL_PTE_NCACHE|INTEL_PTE_WTHRU;
            if (wired)
                template |= INTEL_PTE_WIRED;
@@ -2059,7 +2047,7 @@ Retry:
            if (prot & VM_PROT_WRITE)
                template |= INTEL_PTE_WRITE;
            if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486
-               && pa >= phys_last_addr)
+               && !is_physmem)
                template |= INTEL_PTE_NCACHE|INTEL_PTE_WTHRU;
            if (wired)
                template |= INTEL_PTE_WIRED;
@@ -2145,20 +2133,20 @@ void pmap_change_wiring(
  *             with the given map/virtual_address pair.
  */
 
-vm_offset_t pmap_extract(
+phys_addr_t pmap_extract(
        pmap_t          pmap,
        vm_offset_t     va)
 {
        pt_entry_t      *pte;
-       vm_offset_t     pa;
+       phys_addr_t     pa;
        int             spl;
 
        SPLVM(spl);
        simple_lock(&pmap->lock);
        if ((pte = pmap_pte(pmap, va)) == PT_ENTRY_NULL)
-           pa = (vm_offset_t) 0;
+           pa = 0;
        else if (!(*pte & INTEL_PTE_VALID))
-           pa = (vm_offset_t) 0;
+           pa = 0;
        else
            pa = pte_to_pa(*pte) + (va & INTEL_OFFMASK);
        simple_unlock(&pmap->lock);
@@ -2199,7 +2187,7 @@ void pmap_collect(pmap_t p)
 {
        pt_entry_t              *pdp, *ptp;
        pt_entry_t              *eptp;
-       vm_offset_t             pa;
+       phys_addr_t             pa;
        int                     spl, wired;
 
        if (p == PMAP_NULL)
@@ -2415,13 +2403,13 @@ pmap_pageable(
  */
 void
 phys_attribute_clear(
-       vm_offset_t     phys,
+       phys_addr_t     phys,
        int             bits)
 {
        pv_entry_t              pv_h;
        pv_entry_t              pv_e;
        pt_entry_t              *pte;
-       int                     pai;
+       unsigned long           pai;
        pmap_t                  pmap;
        int                     spl;
 
@@ -2499,13 +2487,13 @@ phys_attribute_clear(
  */
 boolean_t
 phys_attribute_test(
-       vm_offset_t     phys,
+       phys_addr_t     phys,
        int             bits)
 {
        pv_entry_t              pv_h;
        pv_entry_t              pv_e;
        pt_entry_t              *pte;
-       int                     pai;
+       unsigned long           pai;
        pmap_t                  pmap;
        int                     spl;
 
@@ -2587,7 +2575,7 @@ phys_attribute_test(
  *     Clear the modify bits on the specified physical page.
  */
 
-void pmap_clear_modify(vm_offset_t phys)
+void pmap_clear_modify(phys_addr_t phys)
 {
        phys_attribute_clear(phys, PHYS_MODIFIED);
 }
@@ -2599,7 +2587,7 @@ void pmap_clear_modify(vm_offset_t phys)
  *     by any physical maps.
  */
 
-boolean_t pmap_is_modified(vm_offset_t phys)
+boolean_t pmap_is_modified(phys_addr_t phys)
 {
        return (phys_attribute_test(phys, PHYS_MODIFIED));
 }
@@ -2610,7 +2598,7 @@ boolean_t pmap_is_modified(vm_offset_t phys)
  *     Clear the reference bit on the specified physical page.
  */
 
-void pmap_clear_reference(vm_offset_t phys)
+void pmap_clear_reference(phys_addr_t phys)
 {
        phys_attribute_clear(phys, PHYS_REFERENCED);
 }
@@ -2622,7 +2610,7 @@ void pmap_clear_reference(vm_offset_t phys)
  *     by any physical maps.
  */
 
-boolean_t pmap_is_referenced(vm_offset_t phys)
+boolean_t pmap_is_referenced(phys_addr_t phys)
 {
        return (phys_attribute_test(phys, PHYS_REFERENCED));
 }
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
index 382cd5f..e6a3ede 100644
--- a/i386/intel/pmap.h
+++ b/i386/intel/pmap.h
@@ -64,11 +64,7 @@
  *     i386/i486 Page Table Entry
  */
 
-#if PAE
-typedef unsigned long long     pt_entry_t;
-#else  /* PAE */
-typedef unsigned int   pt_entry_t;
-#endif /* PAE */
+typedef phys_addr_t pt_entry_t;
 #define PT_ENTRY_NULL  ((pt_entry_t *) 0)
 
 #endif /* __ASSEMBLER__ */
@@ -447,19 +443,19 @@ extern void pmap_unmap_page_zero (void);
 /*
  *  pmap_zero_page zeros the specified (machine independent) page.
  */
-extern void pmap_zero_page (vm_offset_t);
+extern void pmap_zero_page (phys_addr_t);
 
 /*
  *  pmap_copy_page copies the specified (machine independent) pages.
  */
-extern void pmap_copy_page (vm_offset_t, vm_offset_t);
+extern void pmap_copy_page (phys_addr_t, phys_addr_t);
 
 /*
  *  kvtophys(addr)
  *
  *  Convert a kernel virtual address to a physical address
  */
-extern vm_offset_t kvtophys (vm_offset_t);
+extern phys_addr_t kvtophys (vm_offset_t);
 
 void pmap_remove_range(
        pmap_t                  pmap,
diff --git a/i386/xen/xen.c b/i386/xen/xen.c
index 8b015c4..d10ecf3 100644
--- a/i386/xen/xen.c
+++ b/i386/xen/xen.c
@@ -58,7 +58,7 @@ void hypclock_machine_intr(int old_ipl, void *ret_addr, 
struct i386_interrupt_st
 }
 
 void hyp_p2m_init(void) {
-       unsigned long nb_pfns = atop(phys_last_addr);
+       unsigned long nb_pfns = vm_page_table_size();
 #ifdef MACH_PSEUDO_PHYS
 #define P2M_PAGE_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
        unsigned long *l3 = (unsigned long *)phystokv(pmap_grab_page()), *l2 = 
NULL;
diff --git a/ipc/ipc_init.c b/ipc/ipc_init.c
index 5ed800f..8e628ad 100644
--- a/ipc/ipc_init.c
+++ b/ipc/ipc_init.c
@@ -111,7 +111,7 @@ ipc_init(void)
        vm_offset_t min, max;
 
        kmem_submap(ipc_kernel_map, kernel_map, &min, &max,
-                   ipc_kernel_map_size, TRUE);
+                   ipc_kernel_map_size);
 
        ipc_host_init();
 }
diff --git a/ipc/mach_debug.c b/ipc/mach_debug.c
index efb07a4..6ddc89b 100644
--- a/ipc/mach_debug.c
+++ b/ipc/mach_debug.c
@@ -257,6 +257,9 @@ mach_port_kernel_object(
        ipc_port_t port;
        kern_return_t kr;
 
+       if (space == IS_NULL)
+               return KERN_INVALID_TASK;
+
        kr = ipc_right_lookup_read(space, name, &entry);
        if (kr != KERN_SUCCESS)
                return kr;
diff --git a/kern/lock.c b/kern/lock.c
index 1daf1b4..a4b8252 100644
--- a/kern/lock.c
+++ b/kern/lock.c
@@ -175,6 +175,7 @@ void _simple_lock(
        l->lock_data = 1;
 
        info = &simple_locks_info[simple_locks_taken++];
+       barrier();
        info->l = l;
        info->expr = expression;
        info->loc = location;
@@ -193,6 +194,7 @@ boolean_t _simple_lock_try(
        l->lock_data = 1;
 
        info = &simple_locks_info[simple_locks_taken++];
+       barrier();
        info->l = l;
        info->expr = expression;
        info->loc = location;
@@ -219,6 +221,7 @@ void simple_unlock(
 
                simple_locks_info[i] = simple_locks_info[simple_locks_taken-1];
        }
+       barrier();
        simple_locks_taken--;
        simple_locks_info[simple_locks_taken] = (struct simple_locks_info) {0};
 }
diff --git a/kern/slab.c b/kern/slab.c
index 1f8e000..d4ef847 100644
--- a/kern/slab.c
+++ b/kern/slab.c
@@ -370,7 +370,7 @@ kmem_pagealloc_physmem(vm_size_t size)
     assert(size == PAGE_SIZE);
 
     for (;;) {
-        page = vm_page_grab_contig(size, VM_PAGE_SEL_DIRECTMAP);
+        page = vm_page_grab();
 
         if (page != NULL)
             break;
@@ -389,7 +389,7 @@ kmem_pagefree_physmem(vm_offset_t addr, vm_size_t size)
     assert(size == PAGE_SIZE);
     page = vm_page_lookup_pa(kvtophys(addr));
     assert(page != NULL);
-    vm_page_free_contig(page, size);
+    vm_page_release(page, FALSE, FALSE);
 }
 
 static vm_offset_t
diff --git a/kern/startup.c b/kern/startup.c
index c87cbb1..19bd7bf 100644
--- a/kern/startup.c
+++ b/kern/startup.c
@@ -78,7 +78,6 @@ boolean_t reboot_on_panic = TRUE;
 #endif /* NCPUS > 1 */
 
 /* XX */
-extern vm_offset_t phys_first_addr, phys_last_addr;
 extern char *kernel_cmdline;
 
 /*
diff --git a/kern/strings.c b/kern/strings.c
index 64410d9..71c9905 100644
--- a/kern/strings.c
+++ b/kern/strings.c
@@ -179,6 +179,7 @@ strlen(
  *     The return value is a pointer to the "s" string.
  */
 
+#if 0
 void *
 memset(
        void *_s, int c, size_t n)
@@ -191,3 +192,105 @@ memset(
 
        return _s;
 }
+#endif
+
+/*
+ * Abstract:
+ *     strchr returns a pointer to the first occurrence of the character
+ *     "c" in the string "s". If "c" is not found, return NULL.
+ */
+char *
+strchr(
+       const char *s,
+       int c)
+{
+       while (*s != c) {
+               if (*s == '\0') {
+                       return NULL;
+               }
+
+               s++;
+       }
+
+       return (char *)s;
+}
+
+/*
+ * Abstract:
+ *     strsep extracts tokens from strings. If "*sp" is NULL, return NULL
+ *     and do nothing. Otherwise, find the first token in string "*sp".
+ *     Tokens are delimited by characters in the string "delim". If no
+ *     delimiter is found, the token is the entire string "*sp", and "*sp"
+ *     is made NULL. Otherwise, overwrite the delimiter with a null byte,
+ *     and make "*sp" point past it.
+ */
+char *
+strsep(
+       char **sp,
+       const char *delim)
+{
+       const char *d;
+       char *s, *t;
+
+       s = t = *sp;
+
+       if (s == NULL) {
+               return NULL;
+       }
+
+       for (;;) {
+               if (*s == '\0') {
+                       *sp = NULL;
+                       return t;
+               }
+
+               d = delim;
+
+               for (;;) {
+                       if (*d == '\0') {
+                               break;
+                       }
+
+                       if (*d == *s) {
+                               *s = '\0';
+                               *sp = s + 1;
+                               return t;
+                       }
+
+                       d++;
+               }
+
+               s++;
+       }
+}
+
+/*
+ * Abstract:
+ *     strstr returns a pointer to the first occurrence of the substring
+ *     "find" in the string "s". If no substring was found, return NULL.
+ */
+char *
+strstr(
+       const char *s,
+       const char *find)
+{
+       size_t len;
+
+       len = strlen(find);
+
+       if (len == 0) {
+               return (char *)s;
+       }
+
+       for (;;) {
+               if (*s == '\0') {
+                       return NULL;
+               }
+
+               if (strncmp(s, find, len) == 0) {
+                       return (char *)s;
+               }
+
+               s++;
+       }
+}
diff --git a/kern/task.c b/kern/task.c
index 673a437..7dff124 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -104,7 +104,7 @@ kern_return_t task_create(
        } else {
                new_task->map = vm_map_create(pmap_create(0),
                                        round_page(VM_MIN_ADDRESS),
-                                       trunc_page(VM_MAX_ADDRESS), TRUE);
+                                       trunc_page(VM_MAX_ADDRESS));
                vm_map_set_name(new_task->map, new_task->name);
        }
 
diff --git a/kern/thread.c b/kern/thread.c
index 7db1f3d..0ac7c53 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -342,7 +342,7 @@ void thread_init(void)
        /* thread_template.sched_stamp (later) */
 
        thread_template.recover = (vm_offset_t) 0;
-       thread_template.vm_privilege = FALSE;
+       thread_template.vm_privilege = 0;
 
        thread_template.user_stop_count = 1;
 
@@ -2233,11 +2233,11 @@ thread_wire(
        thread_lock(thread);
 
        if (wired) {
-           thread->vm_privilege = TRUE;
+           thread->vm_privilege = 1;
            stack_privilege(thread);
        }
        else {
-           thread->vm_privilege = FALSE;
+           thread->vm_privilege = 0;
 /*XXX      stack_unprivilege(thread); */
            thread->stack_privilege = 0;
        }
diff --git a/kern/thread.h b/kern/thread.h
index 7106fd2..f0ed71a 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -77,7 +77,6 @@ struct thread {
                struct {
                        unsigned        state:16;
                        unsigned        wake_active:1;
-                       unsigned        vm_privilege:1;
                        unsigned        active:1;
                };
                event_t event_key;
@@ -146,8 +145,8 @@ struct thread {
        /* VM global variables */
 
        vm_offset_t     recover;        /* page fault recovery (copyin/out) */
-       /* Defined above */
-       /* boolean_t    vm_privilege;      Can use reserved memory? */
+       unsigned int vm_privilege;      /* Can use reserved memory?
+                                          Implemented as a counter */
 
        /* User-visible scheduling state */
        int             user_stop_count;        /* outstanding stops */
diff --git a/linux/dev/glue/block.c b/linux/dev/glue/block.c
index 74126eb..c1d922b 100644
--- a/linux/dev/glue/block.c
+++ b/linux/dev/glue/block.c
@@ -50,6 +50,7 @@
 #include <mach/notify.h>
 
 #include <kern/kalloc.h>
+#include <kern/list.h>
 
 #include <ipc/ipc_port.h>
 #include <ipc/ipc_space.h>
@@ -97,7 +98,7 @@ struct temp_data
   struct inode inode;
   struct file file;
   struct request req;
-  queue_head_t pages;
+  struct list pages;
 };
 
 /* One of these exists for each
@@ -302,11 +303,11 @@ alloc_buffer (int size)
 
   if (! linux_auto_config)
     {
-      while ((m = vm_page_grab (FALSE)) == 0)
+      while ((m = vm_page_grab ()) == 0)
        VM_PAGE_WAIT (0);
       d = current_thread ()->pcb->data;
       assert (d);
-      queue_enter (&d->pages, m, vm_page_t, pageq);
+      list_insert_tail (&d->pages, &m->node);
       return (void *) phystokv(m->phys_addr);
     }
   return (void *) __get_free_pages (GFP_KERNEL, 0, ~0UL);
@@ -317,7 +318,7 @@ static void
 free_buffer (void *p, int size)
 {
   struct temp_data *d;
-  vm_page_t m;
+  vm_page_t m, tmp;
 
   assert (size <= PAGE_SIZE);
 
@@ -325,11 +326,11 @@ free_buffer (void *p, int size)
     {
       d = current_thread ()->pcb->data;
       assert (d);
-      queue_iterate (&d->pages, m, vm_page_t, pageq)
+      list_for_each_entry_safe (&d->pages, m, tmp, node)
        {
          if (phystokv(m->phys_addr) == (vm_offset_t) p)
            {
-             queue_remove (&d->pages, m, vm_page_t, pageq);
+             list_remove (&m->node);
              VM_PAGE_FREE (m);
              return;
            }
@@ -992,7 +993,7 @@ check:
 #define DECL_DATA      struct temp_data td
 #define INIT_DATA()                    \
 {                                      \
-  queue_init (&td.pages);              \
+  list_init (&td.pages);               \
   td.inode.i_rdev = bd->dev;           \
   td.file.f_mode = bd->mode;           \
   td.file.f_flags = bd->flags;         \
@@ -1046,7 +1047,7 @@ device_open (ipc_port_t reply_port, mach_msg_type_name_t 
reply_port_type,
     minor <<= gd->minor_shift;
   dev = MKDEV (major, minor);
 
-  queue_init (&td.pages);
+  list_init (&td.pages);
   current_thread ()->pcb->data = &td;
 
   /* Check partition.  */
@@ -1417,7 +1418,7 @@ device_read (void *d, ipc_port_t reply_port,
   boolean_t dirty;
   int resid, amt;
   io_return_t err = 0;
-  queue_head_t pages;
+  struct list pages;
   vm_map_copy_t copy;
   vm_offset_t addr, offset, alloc_offset, o;
   vm_object_t object;
@@ -1460,7 +1461,7 @@ device_read (void *d, ipc_port_t reply_port,
   if (err)
     goto out;
 
-  queue_init (&pages);
+  list_init (&pages);
 
   while (resid)
     {
@@ -1471,7 +1472,7 @@ device_read (void *d, ipc_port_t reply_port,
 
       /* Map any pages left from previous operation.  */
       o = trunc_page (offset);
-      queue_iterate (&pages, m, vm_page_t, pageq)
+      list_for_each_entry (&pages, m, node)
        {
          pmap_enter (vm_map_pmap (device_io_map),
                      addr + o - trunc_page (offset),
@@ -1483,11 +1484,11 @@ device_read (void *d, ipc_port_t reply_port,
       /* Allocate and map pages.  */
       while (alloc_offset < trunc_page (offset) + len)
        {
-         while ((m = vm_page_grab (FALSE)) == 0)
+         while ((m = vm_page_grab ()) == 0)
            VM_PAGE_WAIT (0);
          assert (! m->active && ! m->inactive);
          m->busy = TRUE;
-         queue_enter (&pages, m, vm_page_t, pageq);
+         list_insert_tail (&pages, &m->node);
          pmap_enter (vm_map_pmap (device_io_map),
                      addr + alloc_offset - trunc_page (offset),
                      m->phys_addr, VM_PROT_READ|VM_PROT_WRITE, TRUE);
@@ -1529,9 +1530,9 @@ device_read (void *d, ipc_port_t reply_port,
       vm_object_lock (object);
       while (o < trunc_page (offset))
        {
-         m = (vm_page_t) queue_first (&pages);
-         assert (! queue_end (&pages, (queue_entry_t) m));
-         queue_remove (&pages, m, vm_page_t, pageq);
+         m = list_first_entry (&pages, struct vm_page, node);
+         assert (! list_end (&pages, &m->node));
+         list_remove (&m->node);
          assert (m->busy);
          vm_page_lock_queues ();
          if (dirty)
@@ -1557,7 +1558,7 @@ device_read (void *d, ipc_port_t reply_port,
   /* Delete kernel buffer.  */
   vm_map_remove (device_io_map, addr, addr + size);
 
-  assert (queue_empty (&pages));
+  assert (list_empty (&pages));
 
 out:
   if (! err)
diff --git a/linux/dev/glue/net.c b/linux/dev/glue/net.c
index 1573273..6b9cadd 100644
--- a/linux/dev/glue/net.c
+++ b/linux/dev/glue/net.c
@@ -428,62 +428,43 @@ device_write (void *d, ipc_port_t reply_port,
              int *bytes_written)
 {
   unsigned char *p;
-  int i, amt, skblen, s;
+  int i, s;
   vm_map_copy_t copy = (vm_map_copy_t) data;
+  char *map_data;
+  vm_offset_t map_addr;
+  vm_size_t map_size;
   struct net_data *nd = d;
   struct linux_device *dev = nd->dev;
   struct sk_buff *skb;
+  kern_return_t kr;
 
   if (count == 0 || count > dev->mtu + dev->hard_header_len)
     return D_INVALID_SIZE;
 
   /* Allocate a sk_buff.  */
-  amt = PAGE_SIZE - (copy->offset & PAGE_MASK);
-  skblen = (amt >= count) ? 0 : count;
-  skb = dev_alloc_skb (skblen);
+  skb = dev_alloc_skb (count);
   if (!skb)
     return D_NO_MEMORY;
 
-  /* Copy user data.  This is only required if it spans multiple pages.  */
-  if (skblen == 0)
-    {
-      assert (copy->cpy_npages == 1);
-
-      skb->copy = copy;
-      skb->data = ((void *) phystokv(copy->cpy_page_list[0]->phys_addr)
-                  + (copy->offset & PAGE_MASK));
-      skb->len = count;
-      skb->head = skb->data;
-      skb->tail = skb->data + skb->len;
-      skb->end = skb->tail;
-    }
-  else
-    {
-      skb->len = skblen;
-      skb->tail = skb->data + skblen;
-      skb->end = skb->tail;
-      
-      memcpy (skb->data,
-             ((void *) phystokv(copy->cpy_page_list[0]->phys_addr)
-              + (copy->offset & PAGE_MASK)),
-             amt);
-      count -= amt;
-      p = skb->data + amt;
-      for (i = 1; count > 0 && i < copy->cpy_npages; i++)
-       {
-         amt = PAGE_SIZE;
-         if (amt > count)
-           amt = count;
-         memcpy (p, (void *) phystokv(copy->cpy_page_list[i]->phys_addr), amt);
-         count -= amt;
-         p += amt;
-       }
+  /* Map user data.  */
+  kr = kmem_io_map_copyout(device_io_map, (vm_offset_t *)&map_data,
+                          &map_addr, &map_size, copy, count);
 
-      assert (count == 0);
+  if (kr) {
+    dev_kfree_skb (skb, FREE_WRITE);
+    return D_NO_MEMORY;
+  }
 
-      vm_map_copy_discard (copy);
-    }
+  /* XXX The underlying physical pages of the mapping could be highmem,
+     for which drivers require the use of a bounce buffer.  */
+  memcpy (skb->data, map_data, count);
+  kmem_io_map_deallocate (device_io_map, map_addr, map_size);
+  vm_map_copy_discard (copy);
 
+  skb->len = count;
+  skb->head = skb->data;
+  skb->tail = skb->data + skb->len;
+  skb->end = skb->tail;
   skb->dev = dev;
   skb->reply = reply_port;
   skb->reply_type = reply_port_type;
diff --git a/linux/dev/init/main.c b/linux/dev/init/main.c
index d69b3fc..3740c12 100644
--- a/linux/dev/init/main.c
+++ b/linux/dev/init/main.c
@@ -104,7 +104,7 @@ linux_init (void)
   /*
    * Initialize memory size.
    */
-  high_memory = phys_last_addr;
+  high_memory = vm_page_seg_end(VM_PAGE_SEL_DIRECTMAP);
   init_IRQ ();
   linux_sched_init ();
 
diff --git a/version.m4 b/version.m4
index 3a7512a..330aa15 100644
--- a/version.m4
+++ b/version.m4
@@ -1,4 +1,4 @@
 m4_define([AC_PACKAGE_NAME],[GNU Mach])
-m4_define([AC_PACKAGE_VERSION],[1.7+git20160809])
+m4_define([AC_PACKAGE_VERSION],[1.7+git20160921])
 m4_define([AC_PACKAGE_BUGREPORT],address@hidden)
 m4_define([AC_PACKAGE_TARNAME],[gnumach])
diff --git a/vm/pmap.h b/vm/pmap.h
index 9bbcdc3..2201b44 100644
--- a/vm/pmap.h
+++ b/vm/pmap.h
@@ -65,8 +65,6 @@
 
 /* During VM initialization, steal a chunk of memory.  */
 extern vm_offset_t     pmap_steal_memory(vm_size_t);
-/* During VM initialization, report remaining unused physical pages.  */
-extern unsigned int    pmap_free_pages(void);
 /* Initialization, after kernel runs in virtual memory.  */
 extern void            pmap_init(void);
 
@@ -75,14 +73,10 @@ extern void         pmap_init(void);
  *     If machine/pmap.h defines MACHINE_PAGES, it must implement
  *     the above functions.  The pmap module has complete control.
  *     Otherwise, it must implement
- *             pmap_free_pages
  *             pmap_virtual_space
  *             pmap_init
  *     and vm/vm_resident.c implements pmap_steal_memory using
- *     pmap_free_pages, pmap_virtual_space, and pmap_enter.
- *
- *     pmap_free_pages may over-estimate the number of unused physical pages.
- *     However, for best performance pmap_free_pages should be accurate.
+ *     pmap_virtual_space and pmap_enter.
  */
 
 /* During VM initialization, report virtual space available for the kernel.  */
@@ -106,7 +100,7 @@ extern void pmap_reference(pmap_t pmap);
 extern void pmap_destroy(pmap_t pmap);
 
 /* Enter a mapping */
-extern void pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa,
+extern void pmap_enter(pmap_t pmap, vm_offset_t va, phys_addr_t pa,
                       vm_prot_t prot, boolean_t wired);
 
 
@@ -134,7 +128,7 @@ extern void         pmap_deactivate(pmap_t, thread_t, int);
  */
 
 /* Restrict access to page. */
-void pmap_page_protect(vm_offset_t pa, vm_prot_t prot);
+void pmap_page_protect(phys_addr_t pa, vm_prot_t prot);
 
 /*
  *     Routines to manage reference/modify bits based on
@@ -143,24 +137,24 @@ void pmap_page_protect(vm_offset_t pa, vm_prot_t prot);
  */
 
 /* Clear reference bit */
-void pmap_clear_reference(vm_offset_t pa);
+void pmap_clear_reference(phys_addr_t pa);
 
 /* Return reference bit */
 #ifndef pmap_is_referenced
-boolean_t pmap_is_referenced(vm_offset_t pa);
+boolean_t pmap_is_referenced(phys_addr_t pa);
 #endif /* pmap_is_referenced */
 
 /* Clear modify bit */
-void pmap_clear_modify(vm_offset_t pa);
+void pmap_clear_modify(phys_addr_t pa);
 
 /* Return modify bit */
-boolean_t pmap_is_modified(vm_offset_t pa);
+boolean_t pmap_is_modified(phys_addr_t pa);
 
 /*
  *     Sundry required routines
  */
 /* Return a virtual-to-physical mapping, if possible.  */
-extern vm_offset_t     pmap_extract(pmap_t, vm_offset_t);
+extern phys_addr_t     pmap_extract(pmap_t, vm_offset_t);
 /* Perform garbage collection, if any.  */
 extern void            pmap_collect(pmap_t);
 /* Specify pageability.  */
@@ -186,8 +180,6 @@ extern kern_return_t        pmap_attribute(void);
  */
 extern vm_offset_t pmap_grab_page (void);
 
-extern boolean_t pmap_valid_page(vm_offset_t x);
-
 /*
  *      Make the specified pages (by pmap, offset)
  *      pageable (or not) as requested.
@@ -200,14 +192,14 @@ extern void pmap_pageable(
 
 /*
  *      Back-door routine for mapping kernel VM at initialization.
- *      Useful for mapping memory outside the range
- *      [phys_first_addr, phys_last_addr) (i.e., devices).
+ *      Useful for mapping memory outside the range of direct mapped
+ *      physical memory (i.e., devices).
  *      Otherwise like pmap_map.
  */
 extern vm_offset_t pmap_map_bd(
         vm_offset_t virt,
-        vm_offset_t start,
-        vm_offset_t end,
+        phys_addr_t start,
+        phys_addr_t end,
         vm_prot_t prot);
 
 /*
diff --git a/vm/vm_fault.c b/vm/vm_fault.c
index 68afc59..99381ef 100644
--- a/vm/vm_fault.c
+++ b/vm/vm_fault.c
@@ -423,7 +423,7 @@ vm_fault_return_t vm_fault_page(
                                         * need to allocate a real page.
                                         */
 
-                                       real_m = 
vm_page_grab(!object->internal);
+                                       real_m = vm_page_grab();
                                        if (real_m == VM_PAGE_NULL) {
                                                vm_fault_cleanup(object, 
first_m);
                                                
return(VM_FAULT_MEMORY_SHORTAGE);
@@ -607,7 +607,7 @@ vm_fault_return_t vm_fault_page(
                                 *      won't block for pages.
                                 */
 
-                               if (m->fictitious && !vm_page_convert(&m, 
FALSE)) {
+                               if (m->fictitious && !vm_page_convert(&m)) {
                                        VM_PAGE_FREE(m);
                                        vm_fault_cleanup(object, first_m);
                                        return(VM_FAULT_MEMORY_SHORTAGE);
@@ -725,7 +725,7 @@ vm_fault_return_t vm_fault_page(
                        assert(m->object == object);
                        first_m = VM_PAGE_NULL;
 
-                       if (m->fictitious && !vm_page_convert(&m, 
!object->internal)) {
+                       if (m->fictitious && !vm_page_convert(&m)) {
                                VM_PAGE_FREE(m);
                                vm_fault_cleanup(object, VM_PAGE_NULL);
                                return(VM_FAULT_MEMORY_SHORTAGE);
@@ -810,7 +810,7 @@ vm_fault_return_t vm_fault_page(
                        /*
                         *      Allocate a page for the copy
                         */
-                       copy_m = vm_page_grab(!first_object->internal);
+                       copy_m = vm_page_grab();
                        if (copy_m == VM_PAGE_NULL) {
                                RELEASE_PAGE(m);
                                vm_fault_cleanup(object, first_m);
diff --git a/vm/vm_kern.c b/vm/vm_kern.c
index 9c0a20b..81bb153 100644
--- a/vm/vm_kern.c
+++ b/vm/vm_kern.c
@@ -778,8 +778,7 @@ kmem_submap(
        vm_map_t        parent,
        vm_offset_t     *min, 
        vm_offset_t     *max,
-       vm_size_t       size,
-       boolean_t       pageable)
+       vm_size_t       size)
 {
        vm_offset_t addr;
        kern_return_t kr;
@@ -802,7 +801,7 @@ kmem_submap(
                panic("kmem_submap");
 
        pmap_reference(vm_map_pmap(parent));
-       vm_map_setup(map, vm_map_pmap(parent), addr, addr + size, pageable);
+       vm_map_setup(map, vm_map_pmap(parent), addr, addr + size);
        kr = vm_map_submap(parent, addr, addr + size, map);
        if (kr != KERN_SUCCESS)
                panic("kmem_submap");
@@ -821,8 +820,7 @@ void kmem_init(
        vm_offset_t     start,
        vm_offset_t     end)
 {
-       vm_map_setup(kernel_map, pmap_kernel(), VM_MIN_KERNEL_ADDRESS, end,
-                    FALSE);
+       vm_map_setup(kernel_map, pmap_kernel(), VM_MIN_KERNEL_ADDRESS, end);
 
        /*
         *      Reserve virtual memory allocated up to this time.
diff --git a/vm/vm_kern.h b/vm/vm_kern.h
index fb8ac7f..4bd89c4 100644
--- a/vm/vm_kern.h
+++ b/vm/vm_kern.h
@@ -57,7 +57,7 @@ extern kern_return_t  kmem_alloc_aligned(vm_map_t, 
vm_offset_t *, vm_size_t);
 extern void            kmem_free(vm_map_t, vm_offset_t, vm_size_t);
 
 extern void            kmem_submap(vm_map_t, vm_map_t, vm_offset_t *,
-                                   vm_offset_t *, vm_size_t, boolean_t);
+                                   vm_offset_t *, vm_size_t);
 
 extern kern_return_t   kmem_io_map_copyout(vm_map_t, vm_offset_t *,
                                            vm_offset_t *, vm_size_t *,
diff --git a/vm/vm_map.c b/vm/vm_map.c
index acac66e..249d18a 100644
--- a/vm/vm_map.c
+++ b/vm/vm_map.c
@@ -175,13 +175,11 @@ void vm_map_setup(
        vm_map_t        map,
        pmap_t          pmap,
        vm_offset_t     min, 
-       vm_offset_t     max,
-       boolean_t       pageable)
+       vm_offset_t     max)
 {
        vm_map_first_entry(map) = vm_map_to_entry(map);
        vm_map_last_entry(map)  = vm_map_to_entry(map);
        map->hdr.nentries = 0;
-       map->hdr.entries_pageable = pageable;
        rbtree_init(&map->hdr.tree);
        rbtree_init(&map->hdr.gap_tree);
 
@@ -211,8 +209,7 @@ void vm_map_setup(
 vm_map_t vm_map_create(
        pmap_t          pmap,
        vm_offset_t     min, 
-       vm_offset_t     max,
-       boolean_t       pageable)
+       vm_offset_t     max)
 {
        vm_map_t        result;
 
@@ -220,11 +217,53 @@ vm_map_t vm_map_create(
        if (result == VM_MAP_NULL)
                panic("vm_map_create");
 
-       vm_map_setup(result, pmap, min, max, pageable);
+       vm_map_setup(result, pmap, min, max);
 
        return(result);
 }
 
+void vm_map_lock(struct vm_map *map)
+{
+       lock_write(&map->lock);
+
+       /*
+        *      XXX Memory allocation may occur while a map is locked,
+        *      for example when clipping entries. If the system is running
+        *      low on memory, allocating may block until pages are
+        *      available. But if a map used by the default pager is
+        *      kept locked, a deadlock occurs.
+        *
+        *      This workaround temporarily elevates the current thread
+        *      VM privileges to avoid that particular deadlock, and does
+        *      so regardless of the map for convenience, and because it's
+        *      currently impossible to predict which map the default pager
+        *      may depend on.
+        *
+        *      This workaround isn't reliable, and only makes exhaustion
+        *      less likely. In particular pageout may cause lots of data
+        *      to be passed between the kernel and the pagers, often
+        *      in the form of large copy maps. Making the minimum
+        *      number of pages depend on the total number of pages
+        *      should make exhaustion even less likely.
+        */
+
+       if (current_thread()) {
+               current_thread()->vm_privilege++;
+               assert(current_thread()->vm_privilege != 0);
+       }
+
+       map->timestamp++;
+}
+
+void vm_map_unlock(struct vm_map *map)
+{
+       if (current_thread()) {
+               current_thread()->vm_privilege--;
+       }
+
+       lock_write_done(&map->lock);
+}
+
 /*
  *     vm_map_entry_create:    [ internal use only ]
  *
@@ -241,26 +280,8 @@ vm_map_entry_t _vm_map_entry_create(map_header)
        const struct vm_map_header *map_header;
 {
        vm_map_entry_t  entry;
-       boolean_t vm_privilege;
 
-       /*
-        *      XXX Map entry creation may occur while a map is locked,
-        *      for example when clipping entries. If the system is running
-        *      low on memory, allocating an entry may block until pages
-        *      are available. But if a map used by the default pager is
-        *      kept locked, a deadlock occurs.
-        *
-        *      This workaround temporarily elevates the current thread
-        *      VM privileges to avoid that particular deadlock, and does
-        *      so regardless of the map for convenience, and because it's
-        *      currently impossible to predict which map the default pager
-        *      may depend on.
-        */
-       vm_privilege = current_thread()->vm_privilege;
-       current_thread()->vm_privilege = TRUE;
        entry = (vm_map_entry_t) kmem_cache_alloc(&vm_map_entry_cache);
-       current_thread()->vm_privilege = vm_privilege;
-
        if (entry == VM_MAP_ENTRY_NULL)
                panic("vm_map_entry_create");
 
@@ -1900,7 +1921,7 @@ vm_map_copy_steal_pages(vm_map_copy_t copy)
                 *      Page was not stolen,  get a new
                 *      one and do the copy now.
                 */
-               while ((new_m = vm_page_grab(FALSE)) == VM_PAGE_NULL) {
+               while ((new_m = vm_page_grab()) == VM_PAGE_NULL) {
                        VM_PAGE_WAIT((void(*)()) 0);
                }
 
@@ -2268,7 +2289,6 @@ start_pass_1:
 
        /*
         * XXXO If there are no permanent objects in the destination,
-        * XXXO and the source and destination map entry caches match,
         * XXXO and the destination map entry is not shared,
         * XXXO then the map entries can be deleted and replaced
         * XXXO with those from the copy.  The following code is the
@@ -2278,8 +2298,7 @@ start_pass_1:
         * XXXO to the above pass and make sure that no wiring is involved.
         */
 /*
- *     if (!contains_permanent_objects &&
- *         copy->cpy_hdr.entries_pageable == dst_map->hdr.entries_pageable) {
+ *     if (!contains_permanent_objects) {
  *
  *              *
  *              *      Run over copy and adjust entries.  Steal code
@@ -2602,48 +2621,6 @@ kern_return_t vm_map_copyout(
        }
 
        /*
-        *      Since we're going to just drop the map
-        *      entries from the copy into the destination
-        *      map, they must come from the same pool.
-        */
-
-       if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
-           /*
-            * Mismatches occur when dealing with the default
-            * pager.
-            */
-           vm_map_entry_t      next, new;
-
-           entry = vm_map_copy_first_entry(copy);
-
-           /*
-            * Reinitialize the copy so that vm_map_copy_entry_link
-            * will work.
-            */
-           copy->cpy_hdr.nentries = 0;
-           copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
-           rbtree_init(&copy->cpy_hdr.tree);
-           rbtree_init(&copy->cpy_hdr.gap_tree);
-           vm_map_copy_first_entry(copy) =
-            vm_map_copy_last_entry(copy) =
-               vm_map_copy_to_entry(copy);
-
-           /*
-            * Copy each entry.
-            */
-           while (entry != vm_map_copy_to_entry(copy)) {
-               new = vm_map_copy_entry_create(copy);
-               vm_map_entry_copy_full(new, entry);
-               vm_map_copy_entry_link(copy,
-                               vm_map_copy_last_entry(copy),
-                               new);
-               next = entry->vme_next;
-               kmem_cache_free(&vm_map_entry_cache, (vm_offset_t) entry);
-               entry = next;
-           }
-       }
-
-       /*
         *      Adjust the addresses in the copy chain, and
         *      reset the region attributes.
         */
@@ -3198,7 +3175,6 @@ kern_return_t vm_map_copyin(
         vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
        copy->type = VM_MAP_COPY_ENTRY_LIST;
        copy->cpy_hdr.nentries = 0;
-       copy->cpy_hdr.entries_pageable = TRUE;
        rbtree_init(&copy->cpy_hdr.tree);
        rbtree_init(&copy->cpy_hdr.gap_tree);
 
@@ -3515,8 +3491,7 @@ kern_return_t vm_map_copyin_object(
        /*
         *      We drop the object into a special copy object
         *      that contains the object directly.  These copy objects
-        *      are distinguished by entries_pageable == FALSE
-        *      and null links.
+        *      are distinguished by links.
         */
 
        copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache);
@@ -4156,8 +4131,7 @@ vm_map_t vm_map_fork(vm_map_t old_map)
 
        new_map = vm_map_create(new_pmap,
                        old_map->min_offset,
-                       old_map->max_offset,
-                       old_map->hdr.entries_pageable);
+                       old_map->max_offset);
 
        for (
            old_entry = vm_map_first_entry(old_map);
diff --git a/vm/vm_map.h b/vm/vm_map.h
index 9e946c5..537c36e 100644
--- a/vm/vm_map.h
+++ b/vm/vm_map.h
@@ -153,8 +153,6 @@ struct vm_map_header {
        struct rbtree           gap_tree;       /* Sorted tree of gap lists
                                                   for allocations */
        int                     nentries;       /* Number of entries */
-       boolean_t               entries_pageable;
-                                               /* are map entries pageable? */
 };
 
 /*
@@ -354,13 +352,9 @@ MACRO_BEGIN                                        \
        (map)->timestamp = 0;                   \
 MACRO_END
 
-#define vm_map_lock(map)                       \
-MACRO_BEGIN                                    \
-       lock_write(&(map)->lock);               \
-       (map)->timestamp++;                     \
-MACRO_END
+void vm_map_lock(struct vm_map *map);
+void vm_map_unlock(struct vm_map *map);
 
-#define vm_map_unlock(map)     lock_write_done(&(map)->lock)
 #define vm_map_lock_read(map)  lock_read(&(map)->lock)
 #define vm_map_unlock_read(map)        lock_read_done(&(map)->lock)
 #define vm_map_lock_write_to_read(map) \
@@ -380,11 +374,9 @@ MACRO_END
 extern void            vm_map_init(void);
 
 /* Initialize an empty map */
-extern void            vm_map_setup(vm_map_t, pmap_t, vm_offset_t, vm_offset_t,
-                                    boolean_t);
+extern void            vm_map_setup(vm_map_t, pmap_t, vm_offset_t, 
vm_offset_t);
 /* Create an empty map */
-extern vm_map_t                vm_map_create(pmap_t, vm_offset_t, vm_offset_t,
-                                     boolean_t);
+extern vm_map_t                vm_map_create(pmap_t, vm_offset_t, vm_offset_t);
 /* Create a map in the image of an existing map */
 extern vm_map_t                vm_map_fork(vm_map_t);
 
diff --git a/vm/vm_page.c b/vm/vm_page.c
index a868fce..2a9f27b 100644
--- a/vm/vm_page.c
+++ b/vm/vm_page.c
@@ -27,10 +27,13 @@
  * multiprocessor systems. When a pool is empty and cannot provide a page,
  * it is filled by transferring multiple pages from the backend buddy system.
  * The symmetric case is handled likewise.
+ *
+ * TODO Limit number of dirty pages, block allocations above a top limit.
  */
 
 #include <string.h>
 #include <kern/assert.h>
+#include <kern/counters.h>
 #include <kern/cpu_number.h>
 #include <kern/debug.h>
 #include <kern/list.h>
@@ -42,6 +45,9 @@
 #include <machine/pmap.h>
 #include <sys/types.h>
 #include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+#define DEBUG 0
 
 #define __init
 #define __initdata
@@ -98,12 +104,96 @@ struct vm_page_free_list {
 };
 
 /*
+ * XXX Because of a potential deadlock involving the default pager (see
+ * vm_map_lock()), it's currently impossible to reliably determine the
+ * minimum number of free pages required for successful pageout. Since
+ * that process is dependent on the amount of physical memory, we scale
+ * the minimum number of free pages from it, in the hope that memory
+ * exhaustion happens as rarely as possible...
+ */
+
+/*
+ * Ratio used to compute the minimum number of pages in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN_NUM   5
+#define VM_PAGE_SEG_THRESHOLD_MIN_DENOM 100
+
+/*
+ * Number of pages reserved for privileged allocations in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN 500
+
+/*
+ * Ratio used to compute the threshold below which pageout is started.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW_NUM   6
+#define VM_PAGE_SEG_THRESHOLD_LOW_DENOM 100
+
+/*
+ * Minimum value the low threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW 600
+
+#if VM_PAGE_SEG_THRESHOLD_LOW <= VM_PAGE_SEG_THRESHOLD_MIN
+#error VM_PAGE_SEG_THRESHOLD_LOW invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_LOW >= VM_PAGE_SEG_THRESHOLD_MIN */
+
+/*
+ * Ratio used to compute the threshold above which pageout is stopped.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH_NUM      10
+#define VM_PAGE_SEG_THRESHOLD_HIGH_DENOM    100
+
+/*
+ * Minimum value the high threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH 1000
+
+#if VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW
+#error VM_PAGE_SEG_THRESHOLD_HIGH invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW */
+
+/*
+ * Minimum number of pages allowed for a segment.
+ */
+#define VM_PAGE_SEG_MIN_PAGES 2000
+
+#if VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH
+#error VM_PAGE_SEG_MIN_PAGES invalid
+#endif /* VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH */
+
+/*
+ * Ratio used to compute the threshold of active pages beyond which
+ * to refill the inactive queue.
+ */
+#define VM_PAGE_HIGH_ACTIVE_PAGE_NUM    1
+#define VM_PAGE_HIGH_ACTIVE_PAGE_DENOM  3
+
+/*
+ * Page cache queue.
+ *
+ * XXX The current implementation hardcodes a preference to evict external
+ * pages first and keep internal ones as much as possible. This is because
+ * the Hurd default pager implementation suffers from bugs that can easily
+ * cause the system to freeze.
+ */
+struct vm_page_queue {
+    struct list internal_pages;
+    struct list external_pages;
+};
+
+/*
  * Segment name buffer size.
  */
 #define VM_PAGE_NAME_SIZE 16
 
 /*
  * Segment of contiguous memory.
+ *
+ * XXX Per-segment locking is probably useless, since one or both of the
+ * page queues lock and the free page queue lock is held on any access.
+ * However it should first be made clear which lock protects access to
+ * which members of a segment.
  */
 struct vm_page_seg {
     struct vm_page_cpu_pool cpu_pools[NCPUS];
@@ -115,6 +205,19 @@ struct vm_page_seg {
     simple_lock_data_t lock;
     struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS];
     unsigned long nr_free_pages;
+
+    /* Free memory thresholds */
+    unsigned long min_free_pages; /* Privileged allocations only */
+    unsigned long low_free_pages; /* Pageout daemon starts scanning */
+    unsigned long high_free_pages; /* Pageout daemon stops scanning,
+                                      unprivileged allocations resume */
+
+    /* Page cache related data */
+    struct vm_page_queue active_pages;
+    unsigned long nr_active_pages;
+    unsigned long high_active_pages;
+    struct vm_page_queue inactive_pages;
+    unsigned long nr_inactive_pages;
 };
 
 /*
@@ -123,6 +226,7 @@ struct vm_page_seg {
 struct vm_page_boot_seg {
     phys_addr_t start;
     phys_addr_t end;
+    boolean_t heap_present;
     phys_addr_t avail_start;
     phys_addr_t avail_end;
 };
@@ -157,6 +261,16 @@ static struct vm_page_boot_seg 
vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata;
  */
 static unsigned int vm_page_segs_size __read_mostly;
 
+/*
+ * If true, unprivileged allocations are blocked, disregarding any other
+ * condition.
+ *
+ * This variable is also used to resume clients once pages are available.
+ *
+ * The free page queue lock must be held when accessing this variable.
+ */
+static boolean_t vm_page_alloc_paused;
+
 static void __init
 vm_page_init_pa(struct vm_page *page, unsigned short seg_index, phys_addr_t pa)
 {
@@ -180,6 +294,40 @@ vm_page_set_type(struct vm_page *page, unsigned int order, 
unsigned short type)
         page[i].type = type;
 }
 
+static boolean_t
+vm_page_pageable(const struct vm_page *page)
+{
+    return (page->object != NULL)
+           && (page->wire_count == 0)
+           && (page->active || page->inactive);
+}
+
+static boolean_t
+vm_page_can_move(const struct vm_page *page)
+{
+    /*
+     * This function is called on pages pulled from the page queues,
+     * implying they're pageable, which is why the wire count isn't
+     * checked here.
+     */
+
+    return !page->busy
+           && !page->wanted
+           && !page->absent
+           && page->object->alive;
+}
+
+static void
+vm_page_remove_mappings(struct vm_page *page)
+{
+    page->busy = TRUE;
+    pmap_page_protect(page->phys_addr, VM_PROT_NONE);
+
+    if (!page->dirty) {
+        page->dirty = pmap_is_modified(page->phys_addr);
+    }
+}
+
 static void __init
 vm_page_free_list_init(struct vm_page_free_list *free_list)
 {
@@ -216,6 +364,19 @@ vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, 
unsigned int order)
 
     assert(order < VM_PAGE_NR_FREE_LISTS);
 
+    if (vm_page_alloc_paused && current_thread()
+        && !current_thread()->vm_privilege) {
+        return NULL;
+    } else if (seg->nr_free_pages <= seg->low_free_pages) {
+        vm_pageout_start();
+
+        if ((seg->nr_free_pages <= seg->min_free_pages)
+            && current_thread() && !current_thread()->vm_privilege) {
+            vm_page_alloc_paused = TRUE;
+            return NULL;
+        }
+    }
+
     for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) {
         free_list = &seg->free_lists[i];
 
@@ -238,6 +399,11 @@ vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, 
unsigned int order)
     }
 
     seg->nr_free_pages -= (1 << order);
+
+    if (seg->nr_free_pages < seg->min_free_pages) {
+        vm_page_alloc_paused = TRUE;
+    }
+
     return page;
 }
 
@@ -361,6 +527,65 @@ vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool,
     simple_unlock(&seg->lock);
 }
 
+static void
+vm_page_queue_init(struct vm_page_queue *queue)
+{
+    list_init(&queue->internal_pages);
+    list_init(&queue->external_pages);
+}
+
+static void
+vm_page_queue_push(struct vm_page_queue *queue, struct vm_page *page)
+{
+    if (page->external) {
+        list_insert_tail(&queue->external_pages, &page->node);
+    } else {
+        list_insert_tail(&queue->internal_pages, &page->node);
+    }
+}
+
+static void
+vm_page_queue_remove(struct vm_page_queue *queue, struct vm_page *page)
+{
+    (void)queue;
+    list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_page_queue_first(struct vm_page_queue *queue, boolean_t external_only)
+{
+    struct vm_page *page;
+
+    if (!list_empty(&queue->external_pages)) {
+        page = list_first_entry(&queue->external_pages, struct vm_page, node);
+        return page;
+    }
+
+    if (!external_only && !list_empty(&queue->internal_pages)) {
+        page = list_first_entry(&queue->internal_pages, struct vm_page, node);
+        return page;
+    }
+
+    return NULL;
+}
+
+static struct vm_page_seg *
+vm_page_seg_get(unsigned short index)
+{
+    assert(index < vm_page_segs_size);
+    return &vm_page_segs[index];
+}
+
+static unsigned int
+vm_page_seg_index(const struct vm_page_seg *seg)
+{
+    unsigned int index;
+
+    index = seg - vm_page_segs;
+    assert(index < vm_page_segs_size);
+    return index;
+}
+
 static phys_addr_t __init
 vm_page_seg_size(struct vm_page_seg *seg)
 {
@@ -383,6 +608,39 @@ vm_page_seg_compute_pool_size(struct vm_page_seg *seg)
 }
 
 static void __init
+vm_page_seg_compute_pageout_thresholds(struct vm_page_seg *seg)
+{
+    unsigned long nr_pages;
+
+    nr_pages = vm_page_atop(vm_page_seg_size(seg));
+
+    if (nr_pages < VM_PAGE_SEG_MIN_PAGES) {
+        panic("vm_page: segment too small");
+    }
+
+    seg->min_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_MIN_NUM
+                          / VM_PAGE_SEG_THRESHOLD_MIN_DENOM;
+
+    if (seg->min_free_pages < VM_PAGE_SEG_THRESHOLD_MIN) {
+        seg->min_free_pages = VM_PAGE_SEG_THRESHOLD_MIN;
+    }
+
+    seg->low_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_LOW_NUM
+                          / VM_PAGE_SEG_THRESHOLD_LOW_DENOM;
+
+    if (seg->low_free_pages < VM_PAGE_SEG_THRESHOLD_LOW) {
+        seg->low_free_pages = VM_PAGE_SEG_THRESHOLD_LOW;
+    }
+
+    seg->high_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_HIGH_NUM
+                           / VM_PAGE_SEG_THRESHOLD_HIGH_DENOM;
+
+    if (seg->high_free_pages < VM_PAGE_SEG_THRESHOLD_HIGH) {
+        seg->high_free_pages = VM_PAGE_SEG_THRESHOLD_HIGH;
+    }
+}
+
+static void __init
 vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end,
                  struct vm_page *pages)
 {
@@ -405,7 +663,15 @@ vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t 
start, phys_addr_t end,
         vm_page_free_list_init(&seg->free_lists[i]);
 
     seg->nr_free_pages = 0;
-    i = seg - vm_page_segs;
+
+    vm_page_seg_compute_pageout_thresholds(seg);
+
+    vm_page_queue_init(&seg->active_pages);
+    seg->nr_active_pages = 0;
+    vm_page_queue_init(&seg->inactive_pages);
+    seg->nr_inactive_pages = 0;
+
+    i = vm_page_seg_index(seg);
 
     for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
         vm_page_init_pa(&pages[vm_page_atop(pa - seg->start)], i, pa);
@@ -482,115 +748,637 @@ vm_page_seg_free(struct vm_page_seg *seg, struct 
vm_page *page,
     }
 }
 
-void __init
-vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end,
-             phys_addr_t avail_start, phys_addr_t avail_end)
+static void
+vm_page_seg_add_active_page(struct vm_page_seg *seg, struct vm_page *page)
 {
-    struct vm_page_boot_seg *seg;
+    assert(page->object != NULL);
+    assert(page->seg_index == vm_page_seg_index(seg));
+    assert(page->type != VM_PT_FREE);
+    assert(page->order == VM_PAGE_ORDER_UNLISTED);
+    assert(!page->free && !page->active && !page->inactive);
+    page->active = TRUE;
+    page->reference = TRUE;
+    vm_page_queue_push(&seg->active_pages, page);
+    seg->nr_active_pages++;
+    vm_page_active_count++;
+}
 
-    assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
-    assert(vm_page_aligned(start));
-    assert(vm_page_aligned(end));
-    assert(vm_page_aligned(avail_start));
-    assert(vm_page_aligned(avail_end));
-    assert(start < end);
-    assert(start <= avail_start);
-    assert(avail_end <= end);
-    assert(vm_page_segs_size < ARRAY_SIZE(vm_page_boot_segs));
+static void
+vm_page_seg_remove_active_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+    assert(page->object != NULL);
+    assert(page->seg_index == vm_page_seg_index(seg));
+    assert(page->type != VM_PT_FREE);
+    assert(page->order == VM_PAGE_ORDER_UNLISTED);
+    assert(!page->free && page->active && !page->inactive);
+    page->active = FALSE;
+    vm_page_queue_remove(&seg->active_pages, page);
+    seg->nr_active_pages--;
+    vm_page_active_count--;
+}
 
-    seg = &vm_page_boot_segs[seg_index];
-    seg->start = start;
-    seg->end = end;
-    seg->avail_start = avail_start;
-    seg->avail_end = avail_end;
-    vm_page_segs_size++;
+static void
+vm_page_seg_add_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+    assert(page->object != NULL);
+    assert(page->seg_index == vm_page_seg_index(seg));
+    assert(page->type != VM_PT_FREE);
+    assert(page->order == VM_PAGE_ORDER_UNLISTED);
+    assert(!page->free && !page->active && !page->inactive);
+    page->inactive = TRUE;
+    vm_page_queue_push(&seg->inactive_pages, page);
+    seg->nr_inactive_pages++;
+    vm_page_inactive_count++;
 }
 
-int
-vm_page_ready(void)
+static void
+vm_page_seg_remove_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
 {
-    return vm_page_is_ready;
+    assert(page->object != NULL);
+    assert(page->seg_index == vm_page_seg_index(seg));
+    assert(page->type != VM_PT_FREE);
+    assert(page->order == VM_PAGE_ORDER_UNLISTED);
+    assert(!page->free && !page->active && page->inactive);
+    page->inactive = FALSE;
+    vm_page_queue_remove(&seg->inactive_pages, page);
+    seg->nr_inactive_pages--;
+    vm_page_inactive_count--;
 }
 
-static unsigned int
-vm_page_select_alloc_seg(unsigned int selector)
+/*
+ * Attempt to pull an active page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_active_page(struct vm_page_seg *seg, boolean_t external_only)
 {
-    unsigned int seg_index;
+    struct vm_page *page, *first;
+    boolean_t locked;
 
-    switch (selector) {
-    case VM_PAGE_SEL_DMA:
-        seg_index = VM_PAGE_SEG_DMA;
-        break;
-    case VM_PAGE_SEL_DMA32:
-        seg_index = VM_PAGE_SEG_DMA32;
-        break;
-    case VM_PAGE_SEL_DIRECTMAP:
-        seg_index = VM_PAGE_SEG_DIRECTMAP;
-        break;
-    case VM_PAGE_SEL_HIGHMEM:
-        seg_index = VM_PAGE_SEG_HIGHMEM;
-        break;
-    default:
-        panic("vm_page: invalid selector");
+    first = NULL;
+
+    for (;;) {
+        page = vm_page_queue_first(&seg->active_pages, external_only);
+
+        if (page == NULL) {
+            break;
+        } else if (first == NULL) {
+            first = page;
+        } else if (first == page) {
+            break;
+        }
+
+        vm_page_seg_remove_active_page(seg, page);
+        locked = vm_object_lock_try(page->object);
+
+        if (!locked) {
+            vm_page_seg_add_active_page(seg, page);
+            continue;
+        }
+
+        if (!vm_page_can_move(page)) {
+            vm_page_seg_add_active_page(seg, page);
+            vm_object_unlock(page->object);
+            continue;
+        }
+
+        return page;
     }
 
-    return MIN(vm_page_segs_size - 1, seg_index);
+    return NULL;
 }
 
-static int __init
-vm_page_boot_seg_loaded(const struct vm_page_boot_seg *seg)
+/*
+ * Attempt to pull an inactive page.
+ *
+ * If successful, the object containing the page is locked.
+ *
+ * XXX See vm_page_seg_pull_active_page (duplicated code).
+ */
+static struct vm_page *
+vm_page_seg_pull_inactive_page(struct vm_page_seg *seg, boolean_t 
external_only)
 {
-    return (seg->end != 0);
+    struct vm_page *page, *first;
+    boolean_t locked;
+
+    first = NULL;
+
+    for (;;) {
+        page = vm_page_queue_first(&seg->inactive_pages, external_only);
+
+        if (page == NULL) {
+            break;
+        } else if (first == NULL) {
+            first = page;
+        } else if (first == page) {
+            break;
+        }
+
+        vm_page_seg_remove_inactive_page(seg, page);
+        locked = vm_object_lock_try(page->object);
+
+        if (!locked) {
+            vm_page_seg_add_inactive_page(seg, page);
+            continue;
+        }
+
+        if (!vm_page_can_move(page)) {
+            vm_page_seg_add_inactive_page(seg, page);
+            vm_object_unlock(page->object);
+            continue;
+        }
+
+        return page;
+    }
+
+    return NULL;
 }
 
-static void __init
-vm_page_check_boot_segs(void)
+/*
+ * Attempt to pull a page cache page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_cache_page(struct vm_page_seg *seg,
+                            boolean_t external_only,
+                            boolean_t *was_active)
 {
-    unsigned int i;
-    int expect_loaded;
+    struct vm_page *page;
 
-    if (vm_page_segs_size == 0)
-        panic("vm_page: no physical memory loaded");
+    page = vm_page_seg_pull_inactive_page(seg, external_only);
 
-    for (i = 0; i < ARRAY_SIZE(vm_page_boot_segs); i++) {
-        expect_loaded = (i < vm_page_segs_size);
+    if (page != NULL) {
+        *was_active = FALSE;
+        return page;
+    }
 
-        if (vm_page_boot_seg_loaded(&vm_page_boot_segs[i]) == expect_loaded)
-            continue;
+    page = vm_page_seg_pull_active_page(seg, external_only);
 
-        panic("vm_page: invalid boot segment table");
+    if (page != NULL) {
+        *was_active = TRUE;
+        return page;
     }
+
+    return NULL;
 }
 
-static phys_addr_t __init
-vm_page_boot_seg_size(struct vm_page_boot_seg *seg)
+static boolean_t
+vm_page_seg_min_page_available(const struct vm_page_seg *seg)
 {
-    return seg->end - seg->start;
+    return (seg->nr_free_pages > seg->min_free_pages);
 }
 
-static phys_addr_t __init
-vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg)
+static boolean_t
+vm_page_seg_page_available(const struct vm_page_seg *seg)
 {
-    return seg->avail_end - seg->avail_start;
+    return (seg->nr_free_pages > seg->high_free_pages);
 }
 
-unsigned long __init
-vm_page_bootalloc(size_t size)
+static boolean_t
+vm_page_seg_usable(const struct vm_page_seg *seg)
 {
-    struct vm_page_boot_seg *seg;
-    phys_addr_t pa;
-    unsigned int i;
+    return (seg->nr_free_pages >= seg->high_free_pages);
+}
 
-    for (i = vm_page_select_alloc_seg(VM_PAGE_SEL_DIRECTMAP);
-         i < vm_page_segs_size;
-         i--) {
-        seg = &vm_page_boot_segs[i];
+static void
+vm_page_seg_double_lock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+    assert(seg1 != seg2);
 
-        if (size <= vm_page_boot_seg_avail_size(seg)) {
-            pa = seg->avail_start;
-            seg->avail_start += vm_page_round(size);
-            return pa;
-        }
+    if (seg1 < seg2) {
+        simple_lock(&seg1->lock);
+        simple_lock(&seg2->lock);
+    } else {
+        simple_lock(&seg2->lock);
+        simple_lock(&seg1->lock);
+    }
+}
+
+static void
+vm_page_seg_double_unlock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+    simple_unlock(&seg1->lock);
+    simple_unlock(&seg2->lock);
+}
+
+/*
+ * Attempt to balance a segment by moving one page to another segment.
+ *
+ * Return TRUE if a page was actually moved.
+ */
+static boolean_t
+vm_page_seg_balance_page(struct vm_page_seg *seg,
+                         struct vm_page_seg *remote_seg)
+{
+    struct vm_page *src, *dest;
+    vm_object_t object;
+    vm_offset_t offset;
+    boolean_t was_active;
+
+    vm_page_lock_queues();
+    simple_lock(&vm_page_queue_free_lock);
+    vm_page_seg_double_lock(seg, remote_seg);
+
+    if (vm_page_seg_usable(seg)
+        || !vm_page_seg_page_available(remote_seg)) {
+        goto error;
+    }
+
+    src = vm_page_seg_pull_cache_page(seg, FALSE, &was_active);
+
+    if (src == NULL) {
+        goto error;
+    }
+
+    assert(src->object != NULL);
+    assert(!src->fictitious && !src->private);
+    assert(src->wire_count == 0);
+    assert(src->type != VM_PT_FREE);
+    assert(src->order == VM_PAGE_ORDER_UNLISTED);
+
+    dest = vm_page_seg_alloc_from_buddy(remote_seg, 0);
+    assert(dest != NULL);
+
+    vm_page_seg_double_unlock(seg, remote_seg);
+    simple_unlock(&vm_page_queue_free_lock);
+
+    if (!was_active && !src->reference && pmap_is_referenced(src->phys_addr)) {
+        src->reference = TRUE;
+    }
+
+    object = src->object;
+    offset = src->offset;
+    vm_page_remove(src);
+
+    vm_page_remove_mappings(src);
+
+    vm_page_set_type(dest, 0, src->type);
+    memcpy(&dest->vm_page_header, &src->vm_page_header,
+           sizeof(*dest) - VM_PAGE_HEADER_SIZE);
+    vm_page_copy(src, dest);
+
+    if (!src->dirty) {
+        pmap_clear_modify(dest->phys_addr);
+    }
+
+    dest->busy = FALSE;
+
+    simple_lock(&vm_page_queue_free_lock);
+    vm_page_init(src);
+    src->free = TRUE;
+    simple_lock(&seg->lock);
+    vm_page_set_type(src, 0, VM_PT_FREE);
+    vm_page_seg_free_to_buddy(seg, src, 0);
+    simple_unlock(&seg->lock);
+    simple_unlock(&vm_page_queue_free_lock);
+
+    vm_page_insert(dest, object, offset);
+    vm_object_unlock(object);
+
+    if (was_active) {
+        vm_page_activate(dest);
+    } else {
+        vm_page_deactivate(dest);
+    }
+
+    vm_page_unlock_queues();
+
+    return TRUE;
+
+error:
+    vm_page_seg_double_unlock(seg, remote_seg);
+    simple_unlock(&vm_page_queue_free_lock);
+    vm_page_unlock_queues();
+    return FALSE;
+}
+
+static boolean_t
+vm_page_seg_balance(struct vm_page_seg *seg)
+{
+    struct vm_page_seg *remote_seg;
+    unsigned int i;
+    boolean_t balanced;
+
+    /*
+     * It's important here that pages are moved to lower priority
+     * segments first.
+     */
+
+    for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+        remote_seg = vm_page_seg_get(i);
+
+        if (remote_seg == seg) {
+            continue;
+        }
+
+        balanced = vm_page_seg_balance_page(seg, remote_seg);
+
+        if (balanced) {
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+
+static boolean_t
+vm_page_seg_evict(struct vm_page_seg *seg,
+                  boolean_t external_only, boolean_t low_memory)
+{
+    struct vm_page *page;
+    boolean_t reclaim, laundry;
+    vm_object_t object;
+    boolean_t was_active;
+
+    page = NULL;
+    object = NULL;
+
+restart:
+    vm_page_lock_queues();
+    simple_lock(&seg->lock);
+
+    if (page != NULL) {
+        vm_object_lock(page->object);
+    } else {
+        page = vm_page_seg_pull_cache_page(seg, external_only, &was_active);
+
+        if (page == NULL) {
+            goto out;
+        }
+    }
+
+    assert(page->object != NULL);
+    assert(!page->fictitious && !page->private);
+    assert(page->wire_count == 0);
+    assert(page->type != VM_PT_FREE);
+    assert(page->order == VM_PAGE_ORDER_UNLISTED);
+
+    object = page->object;
+
+    if (!was_active
+        && (page->reference || pmap_is_referenced(page->phys_addr))) {
+        vm_page_seg_add_active_page(seg, page);
+        simple_unlock(&seg->lock);
+        vm_object_unlock(object);
+        vm_stat.reactivations++;
+        current_task()->reactivations++;
+        vm_page_unlock_queues();
+        page = NULL;
+        goto restart;
+    }
+
+    vm_page_remove_mappings(page);
+
+    if (!page->dirty && !page->precious) {
+        reclaim = TRUE;
+        goto out;
+    }
+
+    reclaim = FALSE;
+
+    /*
+     * If we are very low on memory, then we can't rely on an external
+     * pager to clean a dirty page, because external pagers are not
+     * vm-privileged.
+     *
+     * The laundry bit tells vm_pageout_setup not to do any special
+     * processing of this page since it's immediately going to be
+     * double paged out to the default pager. The laundry bit is
+     * reset and the page is inserted into an internal object by
+     * vm_pageout_setup before the double paging pass.
+     */
+
+    assert(!page->laundry);
+
+    if (object->internal || !low_memory) {
+        laundry = FALSE;
+    } else {
+        laundry = page->laundry = TRUE;
+    }
+
+out:
+    simple_unlock(&seg->lock);
+
+    if (object == NULL) {
+        vm_page_unlock_queues();
+        return FALSE;
+    }
+
+    if (reclaim) {
+        vm_page_free(page);
+        vm_page_unlock_queues();
+
+        if (vm_object_collectable(object)) {
+            vm_object_collect(object);
+        } else {
+            vm_object_unlock(object);
+        }
+
+        return TRUE;
+    }
+
+    vm_page_unlock_queues();
+
+    /*
+     * If there is no memory object for the page, create one and hand it
+     * to the default pager. First try to collapse, so we don't create
+     * one unnecessarily.
+     */
+
+    if (!object->pager_initialized) {
+        vm_object_collapse(object);
+    }
+
+    if (!object->pager_initialized) {
+        vm_object_pager_create(object);
+    }
+
+    if (!object->pager_initialized) {
+        panic("vm_page_seg_evict");
+    }
+
+    vm_pageout_page(page, FALSE, TRUE); /* flush it */
+    vm_object_unlock(object);
+
+    if (laundry) {
+        goto restart;
+    }
+
+    return TRUE;
+}
+
+static void
+vm_page_seg_compute_high_active_page(struct vm_page_seg *seg)
+{
+    unsigned long nr_pages;
+
+    nr_pages = seg->nr_active_pages + seg->nr_inactive_pages;
+    seg->high_active_pages = nr_pages * VM_PAGE_HIGH_ACTIVE_PAGE_NUM
+                             / VM_PAGE_HIGH_ACTIVE_PAGE_DENOM;
+}
+
+static void
+vm_page_seg_refill_inactive(struct vm_page_seg *seg)
+{
+    struct vm_page *page;
+
+    simple_lock(&seg->lock);
+
+    vm_page_seg_compute_high_active_page(seg);
+
+    while (seg->nr_active_pages > seg->high_active_pages) {
+        page = vm_page_seg_pull_active_page(seg, FALSE);
+
+        if (page == NULL) {
+            break;
+        }
+
+        page->reference = FALSE;
+        pmap_clear_reference(page->phys_addr);
+        vm_page_seg_add_inactive_page(seg, page);
+        vm_object_unlock(page->object);
+    }
+
+    simple_unlock(&seg->lock);
+}
+
+void __init
+vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
+{
+    struct vm_page_boot_seg *seg;
+
+    assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
+    assert(vm_page_aligned(start));
+    assert(vm_page_aligned(end));
+    assert(start < end);
+    assert(vm_page_segs_size < ARRAY_SIZE(vm_page_boot_segs));
+
+    seg = &vm_page_boot_segs[seg_index];
+    seg->start = start;
+    seg->end = end;
+    seg->heap_present = FALSE;
+
+#if DEBUG
+    printf("vm_page: load: %s: %llx:%llx\n",
+           vm_page_seg_name(seg_index),
+           (unsigned long long)start, (unsigned long long)end);
+#endif
+
+    vm_page_segs_size++;
+}
+
+void
+vm_page_load_heap(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
+{
+    struct vm_page_boot_seg *seg;
+
+    assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
+    assert(vm_page_aligned(start));
+    assert(vm_page_aligned(end));
+
+    seg = &vm_page_boot_segs[seg_index];
+
+    assert(seg->start <= start);
+    assert(end <= seg-> end);
+
+    seg->avail_start = start;
+    seg->avail_end = end;
+    seg->heap_present = TRUE;
+
+#if DEBUG
+    printf("vm_page: heap: %s: %llx:%llx\n",
+           vm_page_seg_name(seg_index),
+           (unsigned long long)start, (unsigned long long)end);
+#endif
+}
+
+int
+vm_page_ready(void)
+{
+    return vm_page_is_ready;
+}
+
+static unsigned int
+vm_page_select_alloc_seg(unsigned int selector)
+{
+    unsigned int seg_index;
+
+    switch (selector) {
+    case VM_PAGE_SEL_DMA:
+        seg_index = VM_PAGE_SEG_DMA;
+        break;
+    case VM_PAGE_SEL_DMA32:
+        seg_index = VM_PAGE_SEG_DMA32;
+        break;
+    case VM_PAGE_SEL_DIRECTMAP:
+        seg_index = VM_PAGE_SEG_DIRECTMAP;
+        break;
+    case VM_PAGE_SEL_HIGHMEM:
+        seg_index = VM_PAGE_SEG_HIGHMEM;
+        break;
+    default:
+        panic("vm_page: invalid selector");
+    }
+
+    return MIN(vm_page_segs_size - 1, seg_index);
+}
+
+static int __init
+vm_page_boot_seg_loaded(const struct vm_page_boot_seg *seg)
+{
+    return (seg->end != 0);
+}
+
+static void __init
+vm_page_check_boot_segs(void)
+{
+    unsigned int i;
+    int expect_loaded;
+
+    if (vm_page_segs_size == 0)
+        panic("vm_page: no physical memory loaded");
+
+    for (i = 0; i < ARRAY_SIZE(vm_page_boot_segs); i++) {
+        expect_loaded = (i < vm_page_segs_size);
+
+        if (vm_page_boot_seg_loaded(&vm_page_boot_segs[i]) == expect_loaded)
+            continue;
+
+        panic("vm_page: invalid boot segment table");
+    }
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_size(struct vm_page_boot_seg *seg)
+{
+    return seg->end - seg->start;
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg)
+{
+    return seg->avail_end - seg->avail_start;
+}
+
+unsigned long __init
+vm_page_bootalloc(size_t size)
+{
+    struct vm_page_boot_seg *seg;
+    phys_addr_t pa;
+    unsigned int i;
+
+    for (i = vm_page_select_alloc_seg(VM_PAGE_SEL_DIRECTMAP);
+         i < vm_page_segs_size;
+         i--) {
+        seg = &vm_page_boot_segs[i];
+
+        if (size <= vm_page_boot_seg_avail_size(seg)) {
+            pa = seg->avail_start;
+            seg->avail_start += vm_page_round(size);
+            return pa;
+        }
     }
 
     panic("vm_page: no physical memory available");
@@ -683,21 +1471,92 @@ vm_page_lookup_pa(phys_addr_t pa)
     return NULL;
 }
 
-struct vm_page *
-vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short 
type)
+static struct vm_page_seg *
+vm_page_lookup_seg(const struct vm_page *page)
 {
-    struct vm_page *page;
+    struct vm_page_seg *seg;
     unsigned int i;
 
-    for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) {
-        page = vm_page_seg_alloc(&vm_page_segs[i], order, type);
+    for (i = 0; i < vm_page_segs_size; i++) {
+        seg = &vm_page_segs[i];
 
-        if (page != NULL)
-            return page;
+        if ((page->phys_addr >= seg->start) && (page->phys_addr < seg->end)) {
+            return seg;
+        }
     }
 
-    if (type == VM_PT_PMAP)
-        panic("vm_page: unable to allocate pmap page");
+    return NULL;
+}
+
+void vm_page_check(const struct vm_page *page)
+{
+    if (page->fictitious) {
+        if (page->private) {
+            panic("vm_page: page both fictitious and private");
+        }
+
+        if (page->phys_addr != vm_page_fictitious_addr) {
+            panic("vm_page: invalid fictitious page");
+        }
+    } else {
+        struct vm_page_seg *seg;
+
+        if (page->phys_addr == vm_page_fictitious_addr) {
+            panic("vm_page: real page has fictitious address");
+        }
+
+        seg = vm_page_lookup_seg(page);
+
+        if (seg == NULL) {
+            if (!page->private) {
+                panic("vm_page: page claims it's managed but not in any 
segment");
+            }
+        } else {
+            if (page->private) {
+                struct vm_page *real_page;
+
+                if (vm_page_pageable(page)) {
+                    panic("vm_page: private page is pageable");
+                }
+
+                real_page = vm_page_lookup_pa(page->phys_addr);
+
+                if (vm_page_pageable(real_page)) {
+                    panic("vm_page: page underlying private page is pageable");
+                }
+
+                if ((real_page->type == VM_PT_FREE)
+                    || (real_page->order != VM_PAGE_ORDER_UNLISTED)) {
+                    panic("vm_page: page underlying private pagei is free");
+                }
+            } else {
+                unsigned int index;
+
+                index = vm_page_seg_index(seg);
+
+                if (index != page->seg_index) {
+                    panic("vm_page: page segment mismatch");
+                }
+            }
+        }
+    }
+}
+
+struct vm_page *
+vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short 
type)
+{
+    struct vm_page *page;
+    unsigned int i;
+
+    for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) {
+        page = vm_page_seg_alloc(&vm_page_segs[i], order, type);
+
+        if (page != NULL)
+            return page;
+    }
+
+    if (!current_thread() || current_thread()->vm_privilege)
+        panic("vm_page: privileged thread unable to allocate page");
 
     return NULL;
 }
@@ -740,10 +1599,75 @@ vm_page_info_all(void)
         printf("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n",
                vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT),
                seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT));
+        printf("vm_page: %s: min:%lu low:%lu high:%lu\n",
+               vm_page_seg_name(vm_page_seg_index(seg)),
+               seg->min_free_pages, seg->low_free_pages, seg->high_free_pages);
     }
 }
 
 phys_addr_t
+vm_page_seg_end(unsigned int selector)
+{
+    return vm_page_segs[vm_page_select_alloc_seg(selector)].end;
+}
+
+static unsigned long
+vm_page_boot_table_size(void)
+{
+    unsigned long nr_pages;
+    unsigned int i;
+
+    nr_pages = 0;
+
+    for (i = 0; i < vm_page_segs_size; i++) {
+        nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i]));
+    }
+
+    return nr_pages;
+}
+
+unsigned long
+vm_page_table_size(void)
+{
+    unsigned long nr_pages;
+    unsigned int i;
+
+    if (!vm_page_is_ready) {
+        return vm_page_boot_table_size();
+    }
+
+    nr_pages = 0;
+
+    for (i = 0; i < vm_page_segs_size; i++) {
+        nr_pages += vm_page_atop(vm_page_seg_size(&vm_page_segs[i]));
+    }
+
+    return nr_pages;
+}
+
+unsigned long
+vm_page_table_index(phys_addr_t pa)
+{
+    struct vm_page_seg *seg;
+    unsigned long index;
+    unsigned int i;
+
+    index = 0;
+
+    for (i = 0; i < vm_page_segs_size; i++) {
+        seg = &vm_page_segs[i];
+
+        if ((pa >= seg->start) && (pa < seg->end)) {
+            return index + vm_page_atop(pa - seg->start);
+        }
+
+        index += vm_page_atop(vm_page_seg_size(seg));
+    }
+
+    panic("vm_page: invalid physical address");
+}
+
+phys_addr_t
 vm_page_mem_size(void)
 {
     phys_addr_t total;
@@ -752,10 +1676,6 @@ vm_page_mem_size(void)
     total = 0;
 
     for (i = 0; i < vm_page_segs_size; i++) {
-        /* XXX */
-        if (i > VM_PAGE_SEG_DIRECTMAP)
-            continue;
-
         total += vm_page_seg_size(&vm_page_segs[i]);
     }
 
@@ -771,12 +1691,413 @@ vm_page_mem_free(void)
     total = 0;
 
     for (i = 0; i < vm_page_segs_size; i++) {
-        /* XXX */
-        if (i >  VM_PAGE_SEG_DIRECTMAP)
-            continue;
-
         total += vm_page_segs[i].nr_free_pages;
     }
 
     return total;
 }
+
+/*
+ * Mark this page as wired down by yet another map, removing it
+ * from paging queues as necessary.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_wire(struct vm_page *page)
+{
+    VM_PAGE_CHECK(page);
+
+    if (page->wire_count == 0) {
+        vm_page_queues_remove(page);
+
+        if (!page->private && !page->fictitious) {
+            vm_page_wire_count++;
+        }
+    }
+
+    page->wire_count++;
+}
+
+/*
+ * Release one wiring of this page, potentially enabling it to be paged again.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_unwire(struct vm_page *page)
+{
+    struct vm_page_seg *seg;
+
+    VM_PAGE_CHECK(page);
+
+    assert(page->wire_count != 0);
+    page->wire_count--;
+
+    if ((page->wire_count != 0)
+        || page->fictitious
+        || page->private) {
+        return;
+    }
+
+    seg = vm_page_seg_get(page->seg_index);
+
+    simple_lock(&seg->lock);
+    vm_page_seg_add_active_page(seg, page);
+    simple_unlock(&seg->lock);
+
+    vm_page_wire_count--;
+}
+
+/*
+ * Returns the given page to the inactive list, indicating that
+ * no physical maps have access to this page.
+ * [Used by the physical mapping system.]
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_deactivate(struct vm_page *page)
+{
+    struct vm_page_seg *seg;
+
+    VM_PAGE_CHECK(page);
+
+    /*
+     * This page is no longer very interesting.  If it was
+     * interesting (active or inactive/referenced), then we
+     * clear the reference bit and (re)enter it in the
+     * inactive queue.  Note wired pages should not have
+     * their reference bit cleared.
+     */
+
+    if (page->active || (page->inactive && page->reference)) {
+        if (!page->fictitious && !page->private && !page->absent) {
+            pmap_clear_reference(page->phys_addr);
+        }
+
+        page->reference = FALSE;
+        vm_page_queues_remove(page);
+    }
+
+    if ((page->wire_count == 0) && !page->fictitious
+        && !page->private && !page->inactive) {
+        seg = vm_page_seg_get(page->seg_index);
+
+        simple_lock(&seg->lock);
+        vm_page_seg_add_inactive_page(seg, page);
+        simple_unlock(&seg->lock);
+    }
+}
+
+/*
+ * Put the specified page on the active list (if appropriate).
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_activate(struct vm_page *page)
+{
+    struct vm_page_seg *seg;
+
+    VM_PAGE_CHECK(page);
+
+    /*
+     * Unconditionally remove so that, even if the page was already
+     * active, it gets back to the end of the active queue.
+     */
+    vm_page_queues_remove(page);
+
+    if ((page->wire_count == 0) && !page->fictitious && !page->private) {
+        seg = vm_page_seg_get(page->seg_index);
+
+        if (page->active)
+            panic("vm_page_activate: already active");
+
+        simple_lock(&seg->lock);
+        vm_page_seg_add_active_page(seg, page);
+        simple_unlock(&seg->lock);
+    }
+}
+
+void
+vm_page_queues_remove(struct vm_page *page)
+{
+    struct vm_page_seg *seg;
+
+    assert(!page->active || !page->inactive);
+
+    if (!page->active && !page->inactive) {
+        return;
+    }
+
+    seg = vm_page_seg_get(page->seg_index);
+
+    simple_lock(&seg->lock);
+
+    if (page->active) {
+        vm_page_seg_remove_active_page(seg, page);
+    } else {
+        vm_page_seg_remove_inactive_page(seg, page);
+    }
+
+    simple_unlock(&seg->lock);
+}
+
+/*
+ * Check whether segments are all usable for unprivileged allocations.
+ *
+ * If all segments are usable, resume pending unprivileged allocations
+ * and return TRUE.
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+static boolean_t
+vm_page_check_usable(void)
+{
+    struct vm_page_seg *seg;
+    boolean_t usable;
+    unsigned int i;
+
+    simple_lock(&vm_page_queue_free_lock);
+
+    for (i = 0; i < vm_page_segs_size; i++) {
+        seg = vm_page_seg_get(i);
+
+        simple_lock(&seg->lock);
+        usable = vm_page_seg_usable(seg);
+        simple_unlock(&seg->lock);
+
+        if (!usable) {
+            return FALSE;
+        }
+    }
+
+    vm_page_external_pagedout = -1;
+    vm_page_alloc_paused = FALSE;
+    thread_wakeup(&vm_page_alloc_paused);
+    return TRUE;
+}
+
+static boolean_t
+vm_page_may_balance(void)
+{
+    struct vm_page_seg *seg;
+    boolean_t page_available;
+    unsigned int i;
+
+    for (i = 0; i < vm_page_segs_size; i++) {
+        seg = vm_page_seg_get(i);
+
+        simple_lock(&seg->lock);
+        page_available = vm_page_seg_page_available(seg);
+        simple_unlock(&seg->lock);
+
+        if (page_available) {
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+
+static boolean_t
+vm_page_balance_once(void)
+{
+    boolean_t balanced;
+    unsigned int i;
+
+    /*
+     * It's important here that pages are moved from higher priority
+     * segments first.
+     */
+
+    for (i = 0; i < vm_page_segs_size; i++) {
+        balanced = vm_page_seg_balance(vm_page_seg_get(i));
+
+        if (balanced) {
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+
+boolean_t
+vm_page_balance(void)
+{
+    boolean_t balanced;
+
+    while (vm_page_may_balance()) {
+        balanced = vm_page_balance_once();
+
+        if (!balanced) {
+            break;
+        }
+    }
+
+    return vm_page_check_usable();
+}
+
+static boolean_t
+vm_page_evict_once(boolean_t external_only)
+{
+    struct vm_page_seg *seg;
+    boolean_t low_memory, min_page_available, evicted;
+    unsigned int i;
+
+    /*
+     * XXX Page allocation currently only uses the DIRECTMAP selector,
+     * allowing us to know which segments to look at when determining
+     * whether we're very low on memory.
+     */
+    low_memory = TRUE;
+
+    simple_lock(&vm_page_queue_free_lock);
+
+    for (i = 0; i < vm_page_segs_size; i++) {
+        if (i > VM_PAGE_SEG_DIRECTMAP) {
+            break;
+        }
+
+        seg = vm_page_seg_get(i);
+
+        simple_lock(&seg->lock);
+        min_page_available = vm_page_seg_min_page_available(seg);
+        simple_unlock(&seg->lock);
+
+        if (min_page_available) {
+            low_memory = FALSE;
+            break;
+        }
+    }
+
+    simple_unlock(&vm_page_queue_free_lock);
+
+    /*
+     * It's important here that pages are evicted from lower priority
+     * segments first.
+     */
+
+    for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+        evicted = vm_page_seg_evict(vm_page_seg_get(i),
+                                    external_only, low_memory);
+
+        if (evicted) {
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+
+#define VM_PAGE_MAX_LAUNDRY   5
+#define VM_PAGE_MAX_EVICTIONS 5
+
+boolean_t
+vm_page_evict(boolean_t *should_wait)
+{
+    boolean_t pause, evicted, external_only;
+    unsigned int i;
+
+    *should_wait = TRUE;
+    external_only = TRUE;
+
+    simple_lock(&vm_page_queue_free_lock);
+    vm_page_external_pagedout = 0;
+    simple_unlock(&vm_page_queue_free_lock);
+
+again:
+    vm_page_lock_queues();
+    pause = (vm_page_laundry_count >= VM_PAGE_MAX_LAUNDRY);
+    vm_page_unlock_queues();
+
+    if (pause) {
+        simple_lock(&vm_page_queue_free_lock);
+        return FALSE;
+    }
+
+    for (i = 0; i < VM_PAGE_MAX_EVICTIONS; i++) {
+        evicted = vm_page_evict_once(external_only);
+
+        if (!evicted) {
+            break;
+        }
+    }
+
+    simple_lock(&vm_page_queue_free_lock);
+
+    /*
+     * Keep in mind eviction may not cause pageouts, since non-precious
+     * clean pages are simply released.
+     */
+    if ((vm_page_external_pagedout == 0) || (vm_page_laundry_count == 0)) {
+        /*
+         * No pageout, but some clean pages were freed. Start a complete
+         * scan again without waiting.
+         */
+        if (evicted) {
+            *should_wait = FALSE;
+            return FALSE;
+        }
+
+        /*
+         * Eviction failed, consider pages from internal objects on the
+         * next attempt.
+         */
+        if (external_only) {
+            simple_unlock(&vm_page_queue_free_lock);
+            external_only = FALSE;
+            goto again;
+        }
+
+        /*
+         * TODO Find out what could cause this and how to deal with it.
+         * This will likely require an out-of-memory killer.
+         */
+        panic("vm_page: unable to recycle any page");
+    }
+
+    simple_unlock(&vm_page_queue_free_lock);
+
+    return vm_page_check_usable();
+}
+
+void
+vm_page_refill_inactive(void)
+{
+    unsigned int i;
+
+    vm_page_lock_queues();
+
+    for (i = 0; i < vm_page_segs_size; i++) {
+        vm_page_seg_refill_inactive(vm_page_seg_get(i));
+    }
+
+    vm_page_unlock_queues();
+}
+
+void
+vm_page_wait(void (*continuation)(void))
+{
+    assert(!current_thread()->vm_privilege);
+
+    simple_lock(&vm_page_queue_free_lock);
+
+    if (!vm_page_alloc_paused) {
+        simple_unlock(&vm_page_queue_free_lock);
+        return;
+    }
+
+    assert_wait(&vm_page_alloc_paused, FALSE);
+
+    simple_unlock(&vm_page_queue_free_lock);
+
+    if (continuation != 0) {
+        counter(c_vm_page_wait_block_user++);
+        thread_block(continuation);
+    } else {
+        counter(c_vm_page_wait_block_kernel++);
+        thread_block((void (*)(void)) 0);
+    }
+}
diff --git a/vm/vm_page.h b/vm/vm_page.h
index f2e20a7..eb684c1 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -40,6 +40,7 @@
 #include <vm/vm_object.h>
 #include <vm/vm_types.h>
 #include <kern/queue.h>
+#include <kern/list.h>
 #include <kern/lock.h>
 #include <kern/log2.h>
 
@@ -77,8 +78,7 @@
  */
 
 struct vm_page {
-       /* Members used in the vm_page module only */
-       struct list node;
+       struct list node;               /* page queues or free list (P) */
        unsigned short type;
        unsigned short seg_index;
        unsigned short order;
@@ -90,15 +90,13 @@ struct vm_page {
         */
        phys_addr_t phys_addr;
 
+       queue_chain_t   listq;          /* all pages in same object (O) */
+       struct vm_page  *next;          /* VP bucket link (O) */
+
        /* We use an empty struct as the delimiter.  */
        struct {} vm_page_header;
 #define VM_PAGE_HEADER_SIZE    offsetof(struct vm_page, vm_page_header)
 
-       queue_chain_t   pageq;          /* queue info for FIFO
-                                        * queue or free list (P) */
-       queue_chain_t   listq;          /* all pages in same object (O) */
-       struct vm_page  *next;          /* VP bucket link (O) */
-
        vm_object_t     object;         /* which object am I in (O,P) */
        vm_offset_t     offset;         /* offset into that object (O,P) */
 
@@ -109,8 +107,7 @@ struct vm_page {
                        laundry:1,      /* page is being cleaned now (P)*/
                        free:1,         /* page is on free list (P) */
                        reference:1,    /* page has been used (P) */
-                       external:1,     /* page considered external (P) */
-                       extcounted:1,   /* page counted in ext counts (P) */
+                       external:1,     /* page in external object (P) */
                        busy:1,         /* page is in transit (O) */
                        wanted:1,       /* someone is waiting for page (O) */
                        tabled:1,       /* page is in VP table (O) */
@@ -137,7 +134,9 @@ struct vm_page {
  *     some useful check on a page structure.
  */
 
-#define VM_PAGE_CHECK(mem)
+#define VM_PAGE_CHECK(mem) vm_page_check(mem)
+
+void vm_page_check(const struct vm_page *page);
 
 /*
  *     Each pageable resident page falls into one of three lists:
@@ -156,13 +155,6 @@ struct vm_page {
  */
 
 extern
-vm_page_t      vm_page_queue_fictitious;       /* fictitious free queue */
-extern
-queue_head_t   vm_page_queue_active;   /* active memory queue */
-extern
-queue_head_t   vm_page_queue_inactive; /* inactive memory queue */
-
-extern
 int    vm_page_fictitious_count;/* How many fictitious pages are free? */
 extern
 int    vm_page_active_count;   /* How many pages are active? */
@@ -171,36 +163,16 @@ int       vm_page_inactive_count; /* How many pages are 
inactive? */
 extern
 int    vm_page_wire_count;     /* How many pages are wired? */
 extern
-int    vm_page_free_target;    /* How many do we want free? */
-extern
-int    vm_page_free_min;       /* When to wakeup pageout */
-extern
-int    vm_page_inactive_target;/* How many do we want inactive? */
-extern
-int    vm_page_free_reserved;  /* How many pages reserved to do pageout */
-extern
 int    vm_page_laundry_count;  /* How many pages being laundered? */
 extern
-int    vm_page_external_limit; /* Max number of pages for external objects  */
-
-/* Only objects marked with the extcounted bit are included in this total.
-   Pages which we scan for possible pageout, but which are not actually
-   dirty, don't get considered against the external page limits any more
-   in this way.  */
-extern
-int    vm_page_external_count; /* How many pages for external objects? */
-
-
+int    vm_page_external_pagedout;      /* How many external pages being paged 
out? */
 
 decl_simple_lock_data(extern,vm_page_queue_lock)/* lock on active and inactive
                                                   page queues */
 decl_simple_lock_data(extern,vm_page_queue_free_lock)
                                                /* lock on free page queue */
 
-extern unsigned int    vm_page_free_wanted;
-                               /* how many threads are waiting for memory */
-
-extern vm_offset_t     vm_page_fictitious_addr;
+extern phys_addr_t     vm_page_fictitious_addr;
                                /* (fake) phys_addr of fictitious pages */
 
 extern void            vm_page_bootstrap(
@@ -212,9 +184,11 @@ extern vm_page_t   vm_page_lookup(
        vm_object_t     object,
        vm_offset_t     offset);
 extern vm_page_t       vm_page_grab_fictitious(void);
-extern boolean_t       vm_page_convert(vm_page_t *, boolean_t);
+extern boolean_t       vm_page_convert(vm_page_t *);
 extern void            vm_page_more_fictitious(void);
-extern vm_page_t       vm_page_grab(boolean_t);
+extern vm_page_t       vm_page_grab(void);
+extern void            vm_page_release(vm_page_t, boolean_t, boolean_t);
+extern phys_addr_t     vm_page_grab_phys_addr(void);
 extern vm_page_t       vm_page_grab_contig(vm_size_t, unsigned int);
 extern void            vm_page_free_contig(vm_page_t, vm_size_t);
 extern void            vm_page_wait(void (*)(void));
@@ -303,22 +277,7 @@ extern unsigned int        vm_page_info(
 #define vm_page_lock_queues()  simple_lock(&vm_page_queue_lock)
 #define vm_page_unlock_queues()        simple_unlock(&vm_page_queue_lock)
 
-#define VM_PAGE_QUEUES_REMOVE(mem)                             \
-       MACRO_BEGIN                                             \
-       if (mem->active) {                                      \
-               queue_remove(&vm_page_queue_active,             \
-                       mem, vm_page_t, pageq);                 \
-               mem->active = FALSE;                            \
-               vm_page_active_count--;                         \
-       }                                                       \
-                                                               \
-       if (mem->inactive) {                                    \
-               queue_remove(&vm_page_queue_inactive,           \
-                       mem, vm_page_t, pageq);                 \
-               mem->inactive = FALSE;                          \
-               vm_page_inactive_count--;                       \
-       }                                                       \
-       MACRO_END
+#define VM_PAGE_QUEUES_REMOVE(mem) vm_page_queues_remove(mem)
 
 /*
  * Copyright (c) 2010-2014 Richard Braun.
@@ -367,18 +326,11 @@ extern unsigned int       vm_page_info(
 
 /*
  * Page usage types.
- *
- * Failing to allocate pmap pages will cause a kernel panic.
- * TODO Obviously, this needs to be addressed, e.g. with a reserved pool of
- * pages.
  */
 #define VM_PT_FREE          0   /* Page unused */
 #define VM_PT_RESERVED      1   /* Page reserved at boot time */
 #define VM_PT_TABLE         2   /* Page is part of the page table */
-#define VM_PT_PMAP          3   /* Page stores pmap-specific data */
-#define VM_PT_KMEM          4   /* Page is part of a kmem slab */
-#define VM_PT_STACK         5   /* Type for generic kernel allocations */
-#define VM_PT_KERNEL        6   /* Type for generic kernel allocations */
+#define VM_PT_KERNEL        3   /* Type for generic kernel allocations */
 
 static inline unsigned short
 vm_page_type(const struct vm_page *page)
@@ -401,29 +353,6 @@ vm_page_to_pa(const struct vm_page *page)
     return page->phys_addr;
 }
 
-#if 0
-static inline unsigned long
-vm_page_direct_va(phys_addr_t pa)
-{
-    assert(pa < VM_PAGE_DIRECTMAP_LIMIT);
-    return ((unsigned long)pa + VM_MIN_DIRECTMAP_ADDRESS);
-}
-
-static inline phys_addr_t
-vm_page_direct_pa(unsigned long va)
-{
-    assert(va >= VM_MIN_DIRECTMAP_ADDRESS);
-    assert(va < VM_MAX_DIRECTMAP_ADDRESS);
-    return (va - VM_MIN_DIRECTMAP_ADDRESS);
-}
-
-static inline void *
-vm_page_direct_ptr(const struct vm_page *page)
-{
-    return (void *)vm_page_direct_va(vm_page_to_pa(page));
-}
-#endif
-
 /*
  * Associate private data with a page.
  */
@@ -442,13 +371,18 @@ vm_page_get_priv(const struct vm_page *page)
 /*
  * Load physical memory into the vm_page module at boot time.
  *
- * The avail_start and avail_end parameters are used to maintain a simple
- * heap for bootstrap allocations.
- *
  * All addresses must be page-aligned. Segments can be loaded in any order.
  */
-void vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end,
-                  phys_addr_t avail_start, phys_addr_t avail_end);
+void vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end);
+
+/*
+ * Load available physical memory into the vm_page module at boot time.
+ *
+ * The segment referred to must have been loaded with vm_page_load
+ * before loading its heap.
+ */
+void vm_page_load_heap(unsigned int seg_index, phys_addr_t start,
+                       phys_addr_t end);
 
 /*
  * Return true if the vm_page module is completely initialized, false
@@ -521,6 +455,21 @@ const char * vm_page_seg_name(unsigned int seg_index);
 void vm_page_info_all(void);
 
 /*
+ * Return the maximum physical address for a given segment selector.
+ */
+phys_addr_t vm_page_seg_end(unsigned int selector);
+
+/*
+ * Return the total number of physical pages.
+ */
+unsigned long vm_page_table_size(void);
+
+/*
+ * Return the index of a page in the page table.
+ */
+unsigned long vm_page_table_index(phys_addr_t pa);
+
+/*
  * Return the total amount of physical memory.
  */
 phys_addr_t vm_page_mem_size(void);
@@ -533,4 +482,53 @@ phys_addr_t vm_page_mem_size(void);
  */
 unsigned long vm_page_mem_free(void);
 
+/*
+ * Remove the given page from any page queue it might be in.
+ */
+void vm_page_queues_remove(struct vm_page *page);
+
+/*
+ * Balance physical pages among segments.
+ *
+ * This function should be called first by the pageout daemon
+ * on memory pressure, since it may be unnecessary to perform any
+ * other operation, let alone shrink caches, if balancing is
+ * enough to make enough free pages.
+ *
+ * Return TRUE if balancing made enough free pages for unprivileged
+ * allocations to succeed, in which case pending allocations are resumed.
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+boolean_t vm_page_balance(void);
+
+/*
+ * Evict physical pages.
+ *
+ * This function should be called by the pageout daemon after balancing
+ * the segments and shrinking kernel caches.
+ *
+ * Return TRUE if eviction made enough free pages for unprivileged
+ * allocations to succeed, in which case pending allocations are resumed.
+ *
+ * Otherwise, report whether the pageout daemon should wait (some pages
+ * have been paged out) or not (only clean pages have been released).
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+boolean_t vm_page_evict(boolean_t *should_wait);
+
+/*
+ * Turn active pages into inactive ones for second-chance LRU
+ * approximation.
+ *
+ * This function should be called by the pageout daemon on memory pressure,
+ * i.e. right before evicting pages.
+ *
+ * XXX This is probably not the best strategy, compared to keeping the
+ * active/inactive ratio in check at all times, but this means less
+ * frequent refills.
+ */
+void vm_page_refill_inactive(void);
+
 #endif /* _VM_VM_PAGE_H_ */
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
index f420804..dd0f995 100644
--- a/vm/vm_pageout.c
+++ b/vm/vm_pageout.c
@@ -53,140 +53,17 @@
 #include <vm/vm_pageout.h>
 #include <machine/locore.h>
 
-
-
-#ifndef        VM_PAGEOUT_BURST_MAX
-#define        VM_PAGEOUT_BURST_MAX    10              /* number of pages */
-#endif /* VM_PAGEOUT_BURST_MAX */
-
-#ifndef        VM_PAGEOUT_BURST_MIN
-#define        VM_PAGEOUT_BURST_MIN    5               /* number of pages */
-#endif /* VM_PAGEOUT_BURST_MIN */
-
-#ifndef        VM_PAGEOUT_BURST_WAIT
-#define        VM_PAGEOUT_BURST_WAIT   10              /* milliseconds per 
page */
-#endif /* VM_PAGEOUT_BURST_WAIT */
-
-#ifndef        VM_PAGEOUT_EMPTY_WAIT
-#define VM_PAGEOUT_EMPTY_WAIT  75              /* milliseconds */
-#endif /* VM_PAGEOUT_EMPTY_WAIT */
-
-#ifndef        VM_PAGEOUT_PAUSE_MAX
-#define        VM_PAGEOUT_PAUSE_MAX    10              /* number of pauses */
-#endif /* VM_PAGEOUT_PAUSE_MAX */
-
-/*
- *     To obtain a reasonable LRU approximation, the inactive queue
- *     needs to be large enough to give pages on it a chance to be
- *     referenced a second time.  This macro defines the fraction
- *     of active+inactive pages that should be inactive.
- *     The pageout daemon uses it to update vm_page_inactive_target.
- *
- *     If the number of free pages falls below vm_page_free_target and
- *     vm_page_inactive_count is below vm_page_inactive_target,
- *     then the pageout daemon starts running.
- */
-
-#ifndef        VM_PAGE_INACTIVE_TARGET
-#define        VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 2 / 3)
-#endif /* VM_PAGE_INACTIVE_TARGET */
-
 /*
- *     Once the pageout daemon starts running, it keeps going
- *     until the number of free pages meets or exceeds vm_page_free_target.
+ * Event placeholder for pageout requests, synchronized with
+ * the free page queue lock.
  */
-
-#ifndef        VM_PAGE_FREE_TARGET
-#define        VM_PAGE_FREE_TARGET(free)       (150 + (free) * 10 / 100)
-#endif /* VM_PAGE_FREE_TARGET */
+static int vm_pageout_requested;
 
 /*
- *     The pageout daemon always starts running once the number of free pages
- *     falls below vm_page_free_min.
- */
-
-#ifndef        VM_PAGE_FREE_MIN
-#define        VM_PAGE_FREE_MIN(free)  (100 + (free) * 8 / 100)
-#endif /* VM_PAGE_FREE_MIN */
-
-/*      When vm_page_external_count exceeds vm_page_external_limit, 
- *     allocations of externally paged pages stops.
- */
-
-#ifndef VM_PAGE_EXTERNAL_LIMIT
-#define VM_PAGE_EXTERNAL_LIMIT(free)           ((free) / 2)
-#endif  /* VM_PAGE_EXTERNAL_LIMIT */
-
-/*     Attempt to keep the number of externally paged pages less
- *     than vm_pages_external_target.
+ * Event placeholder for pageout throttling, synchronized with
+ * the free page queue lock.
  */
-#ifndef VM_PAGE_EXTERNAL_TARGET
-#define VM_PAGE_EXTERNAL_TARGET(free)          ((free) / 4)
-#endif  /* VM_PAGE_EXTERNAL_TARGET */
-
-/*
- *     When the number of free pages falls below vm_page_free_reserved,
- *     only vm-privileged threads can allocate pages.  vm-privilege
- *     allows the pageout daemon and default pager (and any other
- *     associated threads needed for default pageout) to continue
- *     operation by dipping into the reserved pool of pages.  */
-
-#ifndef        VM_PAGE_FREE_RESERVED
-#define        VM_PAGE_FREE_RESERVED                   500
-#endif /* VM_PAGE_FREE_RESERVED */
-
-/*
- *     When the number of free pages falls below vm_pageout_reserved_internal,
- *     the pageout daemon no longer trusts external pagers to clean pages.
- *     External pagers are probably all wedged waiting for a free page.
- *     It forcibly double-pages dirty pages belonging to external objects,
- *     getting the pages to the default pager to clean.
- */
-
-#ifndef        VM_PAGEOUT_RESERVED_INTERNAL
-#define        VM_PAGEOUT_RESERVED_INTERNAL(reserve)   ((reserve) - 250)
-#endif /* VM_PAGEOUT_RESERVED_INTERNAL */
-
-/*
- *     When the number of free pages falls below vm_pageout_reserved_really,
- *     the pageout daemon stops work entirely to let the default pager
- *     catch up (assuming the default pager has pages to clean).
- *     Beyond this point, it is too dangerous to consume memory
- *     even for memory_object_data_write messages to the default pager.
- */
-
-#ifndef        VM_PAGEOUT_RESERVED_REALLY
-#define        VM_PAGEOUT_RESERVED_REALLY(reserve)     ((reserve) - 400)
-#endif /* VM_PAGEOUT_RESERVED_REALLY */
-
-unsigned int vm_pageout_reserved_internal = 0;
-unsigned int vm_pageout_reserved_really = 0;
-
-unsigned int vm_page_external_target = 0;
-
-unsigned int vm_pageout_burst_max = 0;
-unsigned int vm_pageout_burst_min = 0;
-unsigned int vm_pageout_burst_wait = 0;                /* milliseconds per 
page */
-unsigned int vm_pageout_empty_wait = 0;                /* milliseconds */
-unsigned int vm_pageout_pause_count = 0;
-unsigned int vm_pageout_pause_max = 0;
-
-/*
- *     These variables record the pageout daemon's actions:
- *     how many pages it looks at and what happens to those pages.
- *     No locking needed because only one thread modifies the variables.
- */
-
-unsigned int vm_pageout_active = 0;            /* debugging */
-unsigned int vm_pageout_inactive = 0;          /* debugging */
-unsigned int vm_pageout_inactive_nolock = 0;   /* debugging */
-unsigned int vm_pageout_inactive_busy = 0;     /* debugging */
-unsigned int vm_pageout_inactive_absent = 0;   /* debugging */
-unsigned int vm_pageout_inactive_used = 0;     /* debugging */
-unsigned int vm_pageout_inactive_clean = 0;    /* debugging */
-unsigned int vm_pageout_inactive_dirty = 0;    /* debugging */
-unsigned int vm_pageout_inactive_double = 0;   /* debugging */
-unsigned int vm_pageout_inactive_cleaned_external = 0;
+static int vm_pageout_continue;
 
 /*
  *     Routine:        vm_pageout_setup
@@ -241,15 +118,20 @@ vm_pageout_setup(
 
        /*
         *      If we are not flushing the page, allocate a
-        *      page in the object.  If we cannot get the
-        *      page, flush instead.
+        *      page in the object.
         */
        if (!flush) {
-               vm_object_lock(new_object);
-               new_m = vm_page_alloc(new_object, new_offset);
-               if (new_m == VM_PAGE_NULL)
-                       flush = TRUE;
-               vm_object_unlock(new_object);
+               for (;;) {
+                       vm_object_lock(new_object);
+                       new_m = vm_page_alloc(new_object, new_offset);
+                       vm_object_unlock(new_object);
+
+                       if (new_m != VM_PAGE_NULL) {
+                               break;
+                       }
+
+                       VM_PAGE_WAIT(NULL);
+               }
        }
 
        if (flush) {
@@ -354,26 +236,33 @@ vm_pageout_setup(
        vm_page_lock_queues();
        vm_stat.pageouts++;
        if (m->laundry) {
+
                /*
-                *      vm_pageout_scan is telling us to put this page
-                *      at the front of the inactive queue, so it will
-                *      be immediately paged out to the default pager.
+                *      The caller is telling us that it is going to
+                *      immediately double page this page to the default
+                *      pager.
                 */
 
                assert(!old_object->internal);
                m->laundry = FALSE;
-
-               queue_enter_first(&vm_page_queue_inactive, m,
-                                 vm_page_t, pageq);
-               m->inactive = TRUE;
-               vm_page_inactive_count++;
        } else if (old_object->internal) {
                m->laundry = TRUE;
                vm_page_laundry_count++;
 
                vm_page_wire(m);
-       } else
+       } else {
                vm_page_activate(m);
+
+               /*
+                *      If vm_page_external_pagedout is negative,
+                *      the pageout daemon isn't expecting to be
+                *      notified.
+                */
+
+               if (vm_page_external_pagedout >= 0) {
+                       vm_page_external_pagedout++;
+               }
+       }
        vm_page_unlock_queues();
 
        /*
@@ -504,49 +393,35 @@ vm_pageout_page(
 
 /*
  *     vm_pageout_scan does the dirty work for the pageout daemon.
- *     It returns with vm_page_queue_free_lock held and
- *     vm_page_free_wanted == 0.
+ *
+ *     Return TRUE if the pageout daemon is done for now, FALSE otherwise,
+ *     in which case should_wait indicates whether the pageout daemon
+ *     should wait to allow pagers to keep up.
+ *
+ *     It returns with vm_page_queue_free_lock held.
  */
 
-void vm_pageout_scan(void)
+boolean_t vm_pageout_scan(boolean_t *should_wait)
 {
-       unsigned int burst_count;
-       unsigned int want_pages;
+       boolean_t done;
 
        /*
-        *      We want to gradually dribble pages from the active queue
-        *      to the inactive queue.  If we let the inactive queue get
-        *      very small, and then suddenly dump many pages into it,
-        *      those pages won't get a sufficient chance to be referenced
-        *      before we start taking them from the inactive queue.
-        *
-        *      We must limit the rate at which we send pages to the pagers.
-        *      data_write messages consume memory, for message buffers and
-        *      for map-copy objects.  If we get too far ahead of the pagers,
-        *      we can potentially run out of memory.
-        *
-        *      We can use the laundry count to limit directly the number
-        *      of pages outstanding to the default pager.  A similar
-        *      strategy for external pagers doesn't work, because
-        *      external pagers don't have to deallocate the pages sent them,
-        *      and because we might have to send pages to external pagers
-        *      even if they aren't processing writes.  So we also
-        *      use a burst count to limit writes to external pagers.
-        *
-        *      When memory is very tight, we can't rely on external pagers to
-        *      clean pages.  They probably aren't running, because they
-        *      aren't vm-privileged.  If we kept sending dirty pages to them,
-        *      we could exhaust the free list.  However, we can't just ignore
-        *      pages belonging to external objects, because there might be no
-        *      pages belonging to internal objects.  Hence, we get the page
-        *      into an internal object and then immediately double-page it,
-        *      sending it to the default pager.
-        *
-        *      slab_collect should be last, because the other operations
-        *      might return memory to caches.  When we pause we use
-        *      vm_pageout_scan_continue as our continuation, so we will
-        *      reenter vm_pageout_scan periodically and attempt to reclaim
-        *      internal memory even if we never reach vm_page_free_target.
+        *      Try balancing pages among segments first, since this
+        *      may be enough to resume unprivileged allocations.
+        */
+
+       /* This function returns with vm_page_queue_free_lock held */
+       done = vm_page_balance();
+
+       if (done) {
+               return TRUE;
+       }
+
+       simple_unlock(&vm_page_queue_free_lock);
+
+       /*
+        *      Balancing is not enough. Shrink caches and scan pages
+        *      for eviction.
         */
 
        stack_collect();
@@ -555,428 +430,65 @@ void vm_pageout_scan(void)
        if (0)  /* XXX: pcb_collect doesn't do anything yet, so it is
                   pointless to call consider_thread_collect.  */
        consider_thread_collect();
-       slab_collect();
-
-       for (burst_count = 0;;) {
-               vm_page_t m;
-               vm_object_t object;
-               unsigned long free_count;
-
-               /*
-                *      Recalculate vm_page_inactivate_target.
-                */
-
-               vm_page_lock_queues();
-               vm_page_inactive_target =
-                       VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
-                                               vm_page_inactive_count);
-
-               /*
-                *      Move pages from active to inactive.
-                */
-
-               while ((vm_page_inactive_count < vm_page_inactive_target) &&
-                      !queue_empty(&vm_page_queue_active)) {
-                       vm_object_t obj;
-
-                       vm_pageout_active++;
-                       m = (vm_page_t) queue_first(&vm_page_queue_active);
-                       assert(m->active && !m->inactive);
-
-                       obj = m->object;
-                       if (!vm_object_lock_try(obj)) {
-                               /*
-                                *      Move page to end and continue.
-                                */
-
-                               queue_remove(&vm_page_queue_active, m,
-                                            vm_page_t, pageq);
-                               queue_enter(&vm_page_queue_active, m,
-                                           vm_page_t, pageq);
-                               vm_page_unlock_queues();
-                               vm_page_lock_queues();
-                               continue;
-                       }
-
-                       /*
-                        *      If the page is busy, then we pull it
-                        *      off the active queue and leave it alone.
-                        */
-
-                       if (m->busy) {
-                               vm_object_unlock(obj);
-                               queue_remove(&vm_page_queue_active, m,
-                                            vm_page_t, pageq);
-                               m->active = FALSE;
-                               vm_page_active_count--;
-                               continue;
-                       }
-
-                       /*
-                        *      Deactivate the page while holding the object
-                        *      locked, so we know the page is still not busy.
-                        *      This should prevent races between pmap_enter
-                        *      and pmap_clear_reference.  The page might be
-                        *      absent or fictitious, but vm_page_deactivate
-                        *      can handle that.
-                        */
-
-                       vm_page_deactivate(m);
-                       vm_object_unlock(obj);
-               }
-
-               /*
-                *      We are done if we have met our targets *and*
-                *      nobody is still waiting for a page.
-                */
-
-               simple_lock(&vm_page_queue_free_lock);
-               free_count = vm_page_mem_free();
-               if ((free_count >= vm_page_free_target) &&
-                   (vm_page_external_count <= vm_page_external_target) &&
-                   (vm_page_free_wanted == 0)) {
-                       vm_page_unlock_queues();
-                       break;
-               }
-               want_pages = ((free_count < vm_page_free_target) ||
-                             vm_page_free_wanted);
-               simple_unlock(&vm_page_queue_free_lock);
-
-               /*
-                * Sometimes we have to pause:
-                *      1) No inactive pages - nothing to do.
-                *      2) Flow control - wait for pagers to catch up.
-                *      3) Extremely low memory - sending out dirty pages
-                *      consumes memory.  We don't take the risk of doing
-                *      this if the default pager already has work to do.
-                */
-       pause:
-               if (queue_empty(&vm_page_queue_inactive) ||
-                   (burst_count >= vm_pageout_burst_max) ||
-                   (vm_page_laundry_count >= vm_pageout_burst_max) ||
-                   ((free_count < vm_pageout_reserved_really) &&
-                    (vm_page_laundry_count > 0))) {
-                       unsigned int pages, msecs;
-
-                       /*
-                        *      vm_pageout_burst_wait is msecs/page.
-                        *      If there is nothing for us to do, we wait
-                        *      at least vm_pageout_empty_wait msecs.
-                        */
-
-                       if (vm_page_laundry_count > burst_count)
-                               pages = vm_page_laundry_count;
-                       else
-                               pages = burst_count;
-                       msecs = pages * vm_pageout_burst_wait;
-
-                       if (queue_empty(&vm_page_queue_inactive) &&
-                           (msecs < vm_pageout_empty_wait))
-                               msecs = vm_pageout_empty_wait;
-                       vm_page_unlock_queues();
-
-                       thread_will_wait_with_timeout(current_thread(), msecs);
-                       counter(c_vm_pageout_scan_block++);
-                       thread_block(vm_pageout_scan_continue);
-                       call_continuation(vm_pageout_scan_continue);
-                       /*NOTREACHED*/
-               }
-
-               vm_pageout_inactive++;
-
-               /* Find a page we are interested in paging out.  If we
-                  need pages, then we'll page anything out; otherwise
-                  we only page out external pages. */
-               m = (vm_page_t) queue_first (&vm_page_queue_inactive);
-               while (1)
-                 {
-                   assert (!m->active && m->inactive);
-                   if (want_pages || m->external)
-                     break;
-                   
-                   m = (vm_page_t) queue_next (&m->pageq);
-                   if (!m)
-                     goto pause;
-                 }
-               
-               object = m->object;
-
-               /*
-                *      Try to lock object; since we've got the
-                *      page queues lock, we can only try for this one.
-                */
 
-               if (!vm_object_lock_try(object)) {
-                       /*
-                        *      Move page to end and continue.
-                        */
-
-                       queue_remove(&vm_page_queue_inactive, m,
-                                    vm_page_t, pageq);
-                       queue_enter(&vm_page_queue_inactive, m,
-                                   vm_page_t, pageq);
-                       vm_page_unlock_queues();
-                       vm_pageout_inactive_nolock++;
-                       continue;
-               }
-
-               /*
-                *      Remove the page from the inactive list.
-                */
-
-               queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
-               vm_page_inactive_count--;
-               m->inactive = FALSE;
-
-               if (m->busy || !object->alive) {
-                       /*
-                        *      Somebody is already playing with this page.
-                        *      Leave it off the pageout queues.
-                        */
-
-                       vm_page_unlock_queues();
-                       vm_object_unlock(object);
-                       vm_pageout_inactive_busy++;
-                       continue;
-               }
-
-               /*
-                *      If it's absent, we can reclaim the page.
-                */
-
-               if (want_pages && m->absent) {
-                       vm_pageout_inactive_absent++;
-                   reclaim_page:
-                       vm_page_free(m);
-                       vm_page_unlock_queues();
-
-                       if (vm_object_collectable(object))
-                               vm_object_collect(object);
-                       else
-                               vm_object_unlock(object);
-
-                       continue;
-               }
-
-               /*
-                *      If it's being used, reactivate.
-                *      (Fictitious pages are either busy or absent.)
-                */
-
-               assert(!m->fictitious);
-               if (m->reference || pmap_is_referenced(m->phys_addr)) {
-                       vm_object_unlock(object);
-                       vm_page_activate(m);
-                       vm_stat.reactivations++;
-                       current_task()->reactivations++;
-                       vm_page_unlock_queues();
-                       vm_pageout_inactive_used++;
-                       continue;
-               }
-
-               /*
-                *      Eliminate all mappings.
-                */
-
-               m->busy = TRUE;
-               pmap_page_protect(m->phys_addr, VM_PROT_NONE);
-               if (!m->dirty)
-                       m->dirty = pmap_is_modified(m->phys_addr);
-
-               if (m->external) {
-                       /* Figure out if we still care about this
-                       page in the limit of externally managed pages.
-                       Clean pages don't actually cause system hosage,
-                       so it's ok to stop considering them as
-                       "consumers" of memory. */
-                       if (m->dirty && !m->extcounted) {
-                               m->extcounted = TRUE;
-                               vm_page_external_count++;
-                       } else if (!m->dirty && m->extcounted) {
-                               m->extcounted = FALSE;
-                               vm_page_external_count--;
-                       }
-               }
-               
-               /* If we don't actually need more memory, and the page
-                  is not dirty, put it on the tail of the inactive queue
-                  and move on to the next page. */
-               if (!want_pages && !m->dirty) {
-                       queue_remove (&vm_page_queue_inactive, m, 
-                                     vm_page_t, pageq);
-                       queue_enter (&vm_page_queue_inactive, m,
-                                    vm_page_t, pageq);
-                       vm_page_unlock_queues();
-                       vm_pageout_inactive_cleaned_external++;
-                       continue;
-               }                       
-
-               /*
-                *      If it's clean and not precious, we can free the page.
-                */
-
-               if (!m->dirty && !m->precious) {
-                       vm_pageout_inactive_clean++;
-                       goto reclaim_page;
-               }
-
-               /*
-                *      If we are very low on memory, then we can't
-                *      rely on an external pager to clean a dirty page,
-                *      because external pagers are not vm-privileged.
-                *
-                *      The laundry bit tells vm_pageout_setup to
-                *      put the page back at the front of the inactive
-                *      queue instead of activating the page.  Hence,
-                *      we will pick the page up again immediately and
-                *      resend it to the default pager.
-                */
-
-               assert(!m->laundry);
-               if ((free_count < vm_pageout_reserved_internal) &&
-                   !object->internal) {
-                       m->laundry = TRUE;
-                       vm_pageout_inactive_double++;
-               }
-               vm_page_unlock_queues();
-
-               /*
-                *      If there is no memory object for the page, create
-                *      one and hand it to the default pager.
-                *      [First try to collapse, so we don't create
-                *      one unnecessarily.]
-                */
-
-               if (!object->pager_initialized)
-                       vm_object_collapse(object);
-               if (!object->pager_initialized)
-                       vm_object_pager_create(object);
-               if (!object->pager_initialized)
-                       panic("vm_pageout_scan");
-
-               vm_pageout_inactive_dirty++;
-               vm_pageout_page(m, FALSE, TRUE);        /* flush it */
-               vm_object_unlock(object);
-               burst_count++;
-       }
-}
-
-void vm_pageout_scan_continue(void)
-{
        /*
-        *      We just paused to let the pagers catch up.
-        *      If vm_page_laundry_count is still high,
-        *      then we aren't waiting long enough.
-        *      If we have paused some vm_pageout_pause_max times without
-        *      adjusting vm_pageout_burst_wait, it might be too big,
-        *      so we decrease it.
+        *      slab_collect should be last, because the other operations
+        *      might return memory to caches.
         */
+       slab_collect();
 
-       vm_page_lock_queues();
-       if (vm_page_laundry_count > vm_pageout_burst_min) {
-               vm_pageout_burst_wait++;
-               vm_pageout_pause_count = 0;
-       } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
-               vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
-               if (vm_pageout_burst_wait < 1)
-                       vm_pageout_burst_wait = 1;
-               vm_pageout_pause_count = 0;
-       }
-       vm_page_unlock_queues();
+       vm_page_refill_inactive();
 
-       vm_pageout_continue();
-       /*NOTREACHED*/
+       /* This function returns with vm_page_queue_free_lock held */
+       return vm_page_evict(should_wait);
 }
 
-/*
- *     vm_pageout is the high level pageout daemon.
- */
-
-void vm_pageout_continue(void)
+void vm_pageout(void)
 {
-       /*
-        *      The pageout daemon is never done, so loop forever.
-        *      We should call vm_pageout_scan at least once each
-        *      time we are woken, even if vm_page_free_wanted is
-        *      zero, to check vm_page_free_target and
-        *      vm_page_inactive_target.
-        */
+       boolean_t done, should_wait;
+
+       current_thread()->vm_privilege = 1;
+       stack_privilege(current_thread());
+       thread_set_own_priority(0);
 
        for (;;) {
-               vm_pageout_scan();
+               done = vm_pageout_scan(&should_wait);
                /* we hold vm_page_queue_free_lock now */
-               assert(vm_page_free_wanted == 0);
 
-               assert_wait(&vm_page_free_wanted, FALSE);
-               simple_unlock(&vm_page_queue_free_lock);
-               counter(c_vm_pageout_block++);
-               thread_block(vm_pageout_continue);
+               if (done) {
+                       thread_sleep(&vm_pageout_requested,
+                                    simple_lock_addr(vm_page_queue_free_lock),
+                                    FALSE);
+               } else if (should_wait) {
+                       assert_wait(&vm_pageout_continue, FALSE);
+                       thread_set_timeout(500);
+                       simple_unlock(&vm_page_queue_free_lock);
+                       thread_block(NULL);
+               } else {
+                       simple_unlock(&vm_page_queue_free_lock);
+               }
        }
 }
 
-void vm_pageout(void)
+/*
+ *     Start pageout
+ *
+ *     The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_start(void)
 {
-       unsigned long free_after_reserve;
-
-       current_thread()->vm_privilege = TRUE;
-       stack_privilege(current_thread());
-       thread_set_own_priority(0);
-
-       /*
-        *      Initialize some paging parameters.
-        */
-
-       if (vm_pageout_burst_max == 0)
-               vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
-
-       if (vm_pageout_burst_min == 0)
-               vm_pageout_burst_min = VM_PAGEOUT_BURST_MIN;
-
-       if (vm_pageout_burst_wait == 0)
-               vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
-
-       if (vm_pageout_empty_wait == 0)
-               vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
-
-       if (vm_page_free_reserved == 0)
-               vm_page_free_reserved = VM_PAGE_FREE_RESERVED;
-
-       if (vm_pageout_pause_max == 0)
-               vm_pageout_pause_max = VM_PAGEOUT_PAUSE_MAX;
-
-       if (vm_pageout_reserved_internal == 0)
-               vm_pageout_reserved_internal =
-                       VM_PAGEOUT_RESERVED_INTERNAL(vm_page_free_reserved);
-
-       if (vm_pageout_reserved_really == 0)
-               vm_pageout_reserved_really =
-                       VM_PAGEOUT_RESERVED_REALLY(vm_page_free_reserved);
-
-       free_after_reserve = vm_page_mem_free() - vm_page_free_reserved;
-
-       if (vm_page_external_limit == 0)
-               vm_page_external_limit = 
-                       VM_PAGE_EXTERNAL_LIMIT (free_after_reserve);
-
-       if (vm_page_external_target == 0)
-               vm_page_external_target = 
-                       VM_PAGE_EXTERNAL_TARGET (free_after_reserve);
-
-       if (vm_page_free_min == 0)
-               vm_page_free_min = vm_page_free_reserved +
-                       VM_PAGE_FREE_MIN(free_after_reserve);
-
-       if (vm_page_free_target == 0)
-               vm_page_free_target = vm_page_free_reserved +
-                       VM_PAGE_FREE_TARGET(free_after_reserve);
-
-       if (vm_page_free_target < vm_page_free_min + 5)
-               vm_page_free_target = vm_page_free_min + 5;
+       if (!current_thread())
+               return;
 
-       /*
-        *      vm_pageout_scan will set vm_page_inactive_target.
-        */
+       thread_wakeup_one(&vm_pageout_requested);
+}
 
-       vm_pageout_continue();
-       /*NOTREACHED*/
+/*
+ *     Resume pageout
+ *
+ *     The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_resume(void)
+{
+       thread_wakeup_one(&vm_pageout_continue);
 }
diff --git a/vm/vm_pageout.h b/vm/vm_pageout.h
index ea6cfaf..6ddd821 100644
--- a/vm/vm_pageout.h
+++ b/vm/vm_pageout.h
@@ -46,8 +46,8 @@ extern void vm_pageout_page(vm_page_t, boolean_t, boolean_t);
 
 extern void vm_pageout(void) __attribute__((noreturn));
 
-extern void vm_pageout_continue(void) __attribute__((noreturn));
+extern void vm_pageout_start(void);
 
-extern void vm_pageout_scan_continue(void) __attribute__((noreturn));
+extern void vm_pageout_resume(void);
 
 #endif /* _VM_VM_PAGEOUT_H_ */
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
index ed867f5..e3e34dc 100644
--- a/vm/vm_resident.c
+++ b/vm/vm_resident.c
@@ -39,6 +39,7 @@
 #include <mach/vm_prot.h>
 #include <kern/counters.h>
 #include <kern/debug.h>
+#include <kern/list.h>
 #include <kern/sched_prim.h>
 #include <kern/task.h>
 #include <kern/thread.h>
@@ -92,25 +93,16 @@ typedef struct {
 } vm_page_bucket_t;
 
 vm_page_bucket_t *vm_page_buckets;             /* Array of buckets */
-unsigned int   vm_page_bucket_count = 0;       /* How big is array? */
-unsigned int   vm_page_hash_mask;              /* Mask for hash function */
+unsigned long  vm_page_bucket_count = 0;       /* How big is array? */
+unsigned long  vm_page_hash_mask;              /* Mask for hash function */
 
-vm_page_t      vm_page_queue_fictitious;
+static struct list     vm_page_queue_fictitious;
 decl_simple_lock_data(,vm_page_queue_free_lock)
-unsigned int   vm_page_free_wanted;
 int            vm_page_fictitious_count;
-int            vm_page_external_count;
 int            vm_object_external_count;
 int            vm_object_external_pages;
 
 /*
- * This variable isn't directly used. It's merely a placeholder for the
- * address used to synchronize threads waiting for pages to become
- * available. The real value is returned by vm_page_free_mem().
- */
-unsigned int   vm_page_free_avail;
-
-/*
  *     Occasionally, the virtual memory system uses
  *     resident page structures that do not refer to
  *     real pages, for example to leave a page with
@@ -127,7 +119,7 @@ struct kmem_cache   vm_page_cache;
  *     For debugging, this should be a strange value
  *     that the pmap module can recognize in assertions.
  */
-vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
+phys_addr_t vm_page_fictitious_addr = (phys_addr_t) -1;
 
 /*
  *     Resident page structures are also chained on
@@ -136,8 +128,6 @@ vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
  *     defined here, but are shared by the pageout
  *     module.
  */
-queue_head_t   vm_page_queue_active;
-queue_head_t   vm_page_queue_inactive;
 decl_simple_lock_data(,vm_page_queue_lock)
 int    vm_page_active_count;
 int    vm_page_inactive_count;
@@ -149,12 +139,8 @@ int        vm_page_wire_count;
  *     (done here in vm_page_alloc) can trigger the
  *     pageout daemon.
  */
-int    vm_page_free_target = 0;
-int    vm_page_free_min = 0;
-int    vm_page_inactive_target = 0;
-int    vm_page_free_reserved = 0;
 int    vm_page_laundry_count = 0;
-int    vm_page_external_limit = 0;
+int    vm_page_external_pagedout = 0;
 
 
 /*
@@ -192,11 +178,7 @@ void vm_page_bootstrap(
        simple_lock_init(&vm_page_queue_free_lock);
        simple_lock_init(&vm_page_queue_lock);
 
-       vm_page_queue_fictitious = VM_PAGE_NULL;
-       queue_init(&vm_page_queue_active);
-       queue_init(&vm_page_queue_inactive);
-
-       vm_page_free_wanted = 0;
+       list_init(&vm_page_queue_fictitious);
 
        /*
         *      Allocate (and initialize) the virtual-to-physical
@@ -209,7 +191,7 @@ void vm_page_bootstrap(
         */
 
        if (vm_page_bucket_count == 0) {
-               unsigned int npages = pmap_free_pages();
+               unsigned long npages = vm_page_table_size();
 
                vm_page_bucket_count = 1;
                while (vm_page_bucket_count < npages)
@@ -331,6 +313,7 @@ void                vm_page_module_init(void)
  *     table and object list.
  *
  *     The object and page must be locked.
+ *     The free page queue must not be locked.
  */
 
 void vm_page_insert(
@@ -342,6 +325,14 @@ void vm_page_insert(
 
        VM_PAGE_CHECK(mem);
 
+       assert(!mem->active && !mem->inactive);
+       assert(!mem->external);
+
+       if (!object->internal) {
+               mem->external = TRUE;
+               vm_object_external_pages++;
+       }
+
        if (mem->tabled)
                panic("vm_page_insert");
 
@@ -390,10 +381,6 @@ void vm_page_insert(
                        vm_page_deactivate(last_mem);
        }
        object->last_alloc = offset;
-
-       if (!object->internal) {
-               vm_object_external_pages++;
-       }
 }
 
 /*
@@ -404,6 +391,7 @@ void vm_page_insert(
  *     and we don't do deactivate-behind.
  *
  *     The object and page must be locked.
+ *     The free page queue must not be locked.
  */
 
 void vm_page_replace(
@@ -415,6 +403,14 @@ void vm_page_replace(
 
        VM_PAGE_CHECK(mem);
 
+       assert(!mem->active && !mem->inactive);
+       assert(!mem->external);
+
+       if (!object->internal) {
+               mem->external = TRUE;
+               vm_object_external_pages++;
+       }
+
        if (mem->tabled)
                panic("vm_page_replace");
 
@@ -446,8 +442,10 @@ void vm_page_replace(
                                             listq);
                                m->tabled = FALSE;
                                object->resident_page_count--;
+                               VM_PAGE_QUEUES_REMOVE(m);
 
-                               if (!object->internal) {
+                               if (m->external) {
+                                       m->external = FALSE;
                                        vm_object_external_pages--;
                                }
 
@@ -483,19 +481,16 @@ void vm_page_replace(
 
        object->resident_page_count++;
        assert(object->resident_page_count != 0);
-
-       if (!object->internal) {
-               vm_object_external_pages++;
-       }
 }
 
 /*
  *     vm_page_remove:         [ internal use only ]
  *
  *     Removes the given mem entry from the object/offset-page
- *     table and the object page list.
+ *     table, the object page list, and the page queues.
  *
  *     The object and page must be locked.
+ *     The free page queue must not be locked.
  */
 
 void vm_page_remove(
@@ -543,7 +538,10 @@ void vm_page_remove(
 
        mem->tabled = FALSE;
 
-       if (!mem->object->internal) {
+       VM_PAGE_QUEUES_REMOVE(mem);
+
+       if (mem->external) {
+               mem->external = FALSE;
                vm_object_external_pages--;
        }
 }
@@ -656,11 +654,15 @@ vm_page_t vm_page_grab_fictitious(void)
        vm_page_t m;
 
        simple_lock(&vm_page_queue_free_lock);
-       m = vm_page_queue_fictitious;
-       if (m != VM_PAGE_NULL) {
-               vm_page_fictitious_count--;
-               vm_page_queue_fictitious = (vm_page_t) m->pageq.next;
+       if (list_empty(&vm_page_queue_fictitious)) {
+               m = VM_PAGE_NULL;
+       } else {
+               m = list_first_entry(&vm_page_queue_fictitious,
+                                    struct vm_page, node);
+               assert(m->fictitious);
+               list_remove(&m->node);
                m->free = FALSE;
+               vm_page_fictitious_count--;
        }
        simple_unlock(&vm_page_queue_free_lock);
 
@@ -680,8 +682,7 @@ static void vm_page_release_fictitious(
        if (m->free)
                panic("vm_page_release_fictitious");
        m->free = TRUE;
-       m->pageq.next = (queue_entry_t) vm_page_queue_fictitious;
-       vm_page_queue_fictitious = m;
+       list_insert_head(&vm_page_queue_fictitious, &m->node);
        vm_page_fictitious_count++;
        simple_unlock(&vm_page_queue_free_lock);
 }
@@ -720,9 +721,7 @@ void vm_page_more_fictitious(void)
  *     The object referenced by *MP must be locked.
  */
 
-boolean_t vm_page_convert(
-       struct vm_page **mp,
-       boolean_t external)
+boolean_t vm_page_convert(struct vm_page **mp)
 {
        struct vm_page *real_m, *fict_m;
        vm_object_t object;
@@ -735,7 +734,7 @@ boolean_t vm_page_convert(
        assert(!fict_m->active);
        assert(!fict_m->inactive);
 
-       real_m = vm_page_grab(external);
+       real_m = vm_page_grab();
        if (real_m == VM_PAGE_NULL)
                return FALSE;
 
@@ -766,27 +765,21 @@ boolean_t vm_page_convert(
  *     Returns VM_PAGE_NULL if the free list is too small.
  */
 
-vm_page_t vm_page_grab(
-       boolean_t external)
+vm_page_t vm_page_grab(void)
 {
        vm_page_t       mem;
 
        simple_lock(&vm_page_queue_free_lock);
 
        /*
-        *      Only let privileged threads (involved in pageout)
-        *      dip into the reserved pool or exceed the limit
-        *      for externally-managed pages.
+        * XXX Mach has many modules that merely assume memory is
+        * directly mapped in kernel space. Instead of updating all
+        * users, we assume those which need specific physical memory
+        * properties will wire down their pages, either because
+        * they can't be paged (not part of an object), or with
+        * explicit VM calls. The strategy is then to let memory
+        * pressure balance the physical segments with pageable pages.
         */
-
-       if (((vm_page_mem_free() < vm_page_free_reserved)
-            || (external
-                && (vm_page_external_count > vm_page_external_limit)))
-           && !current_thread()->vm_privilege) {
-               simple_unlock(&vm_page_queue_free_lock);
-               return VM_PAGE_NULL;
-       }
-
        mem = vm_page_alloc_pa(0, VM_PAGE_SEL_DIRECTMAP, VM_PT_KERNEL);
 
        if (mem == NULL) {
@@ -794,35 +787,15 @@ vm_page_t vm_page_grab(
                return NULL;
        }
 
-       if (external)
-               vm_page_external_count++;
-
        mem->free = FALSE;
-       mem->extcounted = mem->external = external;
        simple_unlock(&vm_page_queue_free_lock);
 
-       /*
-        *      Decide if we should poke the pageout daemon.
-        *      We do this if the free count is less than the low
-        *      water mark, or if the free count is less than the high
-        *      water mark (but above the low water mark) and the inactive
-        *      count is less than its target.
-        *
-        *      We don't have the counts locked ... if they change a little,
-        *      it doesn't really matter.
-        */
-
-       if ((vm_page_mem_free() < vm_page_free_min) ||
-           ((vm_page_mem_free() < vm_page_free_target) &&
-            (vm_page_inactive_count < vm_page_inactive_target)))
-               thread_wakeup((event_t) &vm_page_free_wanted);
-
        return mem;
 }
 
-vm_offset_t vm_page_grab_phys_addr(void)
+phys_addr_t vm_page_grab_phys_addr(void)
 {
-       vm_page_t p = vm_page_grab(FALSE);
+       vm_page_t p = vm_page_grab();
        if (p == VM_PAGE_NULL)
                return -1;
        else
@@ -835,8 +808,9 @@ vm_offset_t vm_page_grab_phys_addr(void)
  *     Return a page to the free list.
  */
 
-static void vm_page_release(
+void vm_page_release(
        vm_page_t       mem,
+       boolean_t       laundry,
        boolean_t       external)
 {
        simple_lock(&vm_page_queue_free_lock);
@@ -844,33 +818,28 @@ static void vm_page_release(
                panic("vm_page_release");
        mem->free = TRUE;
        vm_page_free_pa(mem, 0);
-       if (external)
-               vm_page_external_count--;
+       if (laundry) {
+               vm_page_laundry_count--;
 
-       /*
-        *      Check if we should wake up someone waiting for page.
-        *      But don't bother waking them unless they can allocate.
-        *
-        *      We wakeup only one thread, to prevent starvation.
-        *      Because the scheduling system handles wait queues FIFO,
-        *      if we wakeup all waiting threads, one greedy thread
-        *      can starve multiple niceguy threads.  When the threads
-        *      all wakeup, the greedy threads runs first, grabs the page,
-        *      and waits for another page.  It will be the first to run
-        *      when the next page is freed.
-        *
-        *      However, there is a slight danger here.
-        *      The thread we wake might not use the free page.
-        *      Then the other threads could wait indefinitely
-        *      while the page goes unused.  To forestall this,
-        *      the pageout daemon will keep making free pages
-        *      as long as vm_page_free_wanted is non-zero.
-        */
+               if (vm_page_laundry_count == 0) {
+                       vm_pageout_resume();
+               }
+       }
+       if (external) {
 
-       if ((vm_page_free_wanted > 0) &&
-           (vm_page_mem_free() >= vm_page_free_reserved)) {
-               vm_page_free_wanted--;
-               thread_wakeup_one((event_t) &vm_page_free_avail);
+               /*
+                *      If vm_page_external_pagedout is negative,
+                *      the pageout daemon isn't expecting to be
+                *      notified.
+                */
+
+               if (vm_page_external_pagedout > 0) {
+                       vm_page_external_pagedout--;
+               }
+
+               if (vm_page_external_pagedout == 0) {
+                       vm_pageout_resume();
+               }
        }
 
        simple_unlock(&vm_page_queue_free_lock);
@@ -895,18 +864,6 @@ vm_page_t vm_page_grab_contig(
 
        simple_lock(&vm_page_queue_free_lock);
 
-       /*
-        *      Only let privileged threads (involved in pageout)
-        *      dip into the reserved pool or exceed the limit
-        *      for externally-managed pages.
-        */
-
-       if (((vm_page_mem_free() - nr_pages) <= vm_page_free_reserved)
-           && !current_thread()->vm_privilege) {
-               simple_unlock(&vm_page_queue_free_lock);
-               return VM_PAGE_NULL;
-       }
-
        /* TODO Allow caller to pass type */
        mem = vm_page_alloc_pa(order, selector, VM_PT_KERNEL);
 
@@ -917,27 +874,10 @@ vm_page_t vm_page_grab_contig(
 
        for (i = 0; i < nr_pages; i++) {
                mem[i].free = FALSE;
-               mem[i].extcounted = mem[i].external = 0;
        }
 
        simple_unlock(&vm_page_queue_free_lock);
 
-       /*
-        *      Decide if we should poke the pageout daemon.
-        *      We do this if the free count is less than the low
-        *      water mark, or if the free count is less than the high
-        *      water mark (but above the low water mark) and the inactive
-        *      count is less than its target.
-        *
-        *      We don't have the counts locked ... if they change a little,
-        *      it doesn't really matter.
-        */
-
-       if ((vm_page_mem_free() < vm_page_free_min) ||
-           ((vm_page_mem_free() < vm_page_free_target) &&
-            (vm_page_inactive_count < vm_page_inactive_target)))
-               thread_wakeup((event_t) &vm_page_free_wanted);
-
        return mem;
 }
 
@@ -965,53 +905,10 @@ void vm_page_free_contig(vm_page_t mem, vm_size_t size)
 
        vm_page_free_pa(mem, order);
 
-       if ((vm_page_free_wanted > 0) &&
-           (vm_page_mem_free() >= vm_page_free_reserved)) {
-               vm_page_free_wanted--;
-               thread_wakeup_one((event_t) &vm_page_free_avail);
-       }
-
        simple_unlock(&vm_page_queue_free_lock);
 }
 
 /*
- *     vm_page_wait:
- *
- *     Wait for a page to become available.
- *     If there are plenty of free pages, then we don't sleep.
- */
-
-void vm_page_wait(
-       void (*continuation)(void))
-{
-
-       /*
-        *      We can't use vm_page_free_reserved to make this
-        *      determination.  Consider: some thread might
-        *      need to allocate two pages.  The first allocation
-        *      succeeds, the second fails.  After the first page is freed,
-        *      a call to vm_page_wait must really block.
-        */
-
-       simple_lock(&vm_page_queue_free_lock);
-       if ((vm_page_mem_free() < vm_page_free_target)
-           || (vm_page_external_count > vm_page_external_limit)) {
-               if (vm_page_free_wanted++ == 0)
-                       thread_wakeup((event_t)&vm_page_free_wanted);
-               assert_wait((event_t)&vm_page_free_avail, FALSE);
-               simple_unlock(&vm_page_queue_free_lock);
-               if (continuation != 0) {
-                       counter(c_vm_page_wait_block_user++);
-                       thread_block(continuation);
-               } else {
-                       counter(c_vm_page_wait_block_kernel++);
-                       thread_block((void (*)(void)) 0);
-               }
-       } else
-               simple_unlock(&vm_page_queue_free_lock);
-}
-
-/*
  *     vm_page_alloc:
  *
  *     Allocate and return a memory cell associated
@@ -1026,7 +923,7 @@ vm_page_t vm_page_alloc(
 {
        vm_page_t       mem;
 
-       mem = vm_page_grab(!object->internal);
+       mem = vm_page_grab();
        if (mem == VM_PAGE_NULL)
                return VM_PAGE_NULL;
 
@@ -1051,9 +948,11 @@ void vm_page_free(
        if (mem->free)
                panic("vm_page_free");
 
-       if (mem->tabled)
+       if (mem->tabled) {
                vm_page_remove(mem);
-       VM_PAGE_QUEUES_REMOVE(mem);
+       }
+
+       assert(!mem->active && !mem->inactive);
 
        if (mem->wire_count != 0) {
                if (!mem->private && !mem->fictitious)
@@ -1061,11 +960,6 @@ void vm_page_free(
                mem->wire_count = 0;
        }
 
-       if (mem->laundry) {
-               vm_page_laundry_count--;
-               mem->laundry = FALSE;
-       }
-
        PAGE_WAKEUP_DONE(mem);
 
        if (mem->absent)
@@ -1082,117 +976,10 @@ void vm_page_free(
                mem->fictitious = TRUE;
                vm_page_release_fictitious(mem);
        } else {
-               int external = mem->external && mem->extcounted;
+               boolean_t laundry = mem->laundry;
+               boolean_t external = mem->external;
                vm_page_init(mem);
-               vm_page_release(mem, external);
-       }
-}
-
-/*
- *     vm_page_wire:
- *
- *     Mark this page as wired down by yet
- *     another map, removing it from paging queues
- *     as necessary.
- *
- *     The page's object and the page queues must be locked.
- */
-void vm_page_wire(
-       vm_page_t       mem)
-{
-       VM_PAGE_CHECK(mem);
-
-       if (mem->wire_count == 0) {
-               VM_PAGE_QUEUES_REMOVE(mem);
-               if (!mem->private && !mem->fictitious)
-                       vm_page_wire_count++;
-       }
-       mem->wire_count++;
-}
-
-/*
- *     vm_page_unwire:
- *
- *     Release one wiring of this page, potentially
- *     enabling it to be paged again.
- *
- *     The page's object and the page queues must be locked.
- */
-void vm_page_unwire(
-       vm_page_t       mem)
-{
-       VM_PAGE_CHECK(mem);
-
-       if (--mem->wire_count == 0) {
-               queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
-               vm_page_active_count++;
-               mem->active = TRUE;
-               if (!mem->private && !mem->fictitious)
-                       vm_page_wire_count--;
-       }
-}
-
-/*
- *     vm_page_deactivate:
- *
- *     Returns the given page to the inactive list,
- *     indicating that no physical maps have access
- *     to this page.  [Used by the physical mapping system.]
- *
- *     The page queues must be locked.
- */
-void vm_page_deactivate(
-       vm_page_t       m)
-{
-       VM_PAGE_CHECK(m);
-
-       /*
-        *      This page is no longer very interesting.  If it was
-        *      interesting (active or inactive/referenced), then we
-        *      clear the reference bit and (re)enter it in the
-        *      inactive queue.  Note wired pages should not have
-        *      their reference bit cleared.
-        */
-
-       if (m->active || (m->inactive && m->reference)) {
-               if (!m->fictitious && !m->absent)
-                       pmap_clear_reference(m->phys_addr);
-               m->reference = FALSE;
-               VM_PAGE_QUEUES_REMOVE(m);
-       }
-       if (m->wire_count == 0 && !m->inactive) {
-               queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
-               m->inactive = TRUE;
-               vm_page_inactive_count++;
-       }
-}
-
-/*
- *     vm_page_activate:
- *
- *     Put the specified page on the active list (if appropriate).
- *
- *     The page queues must be locked.
- */
-
-void vm_page_activate(
-       vm_page_t       m)
-{
-       VM_PAGE_CHECK(m);
-
-       if (m->inactive) {
-               queue_remove(&vm_page_queue_inactive, m, vm_page_t,
-                                               pageq);
-               vm_page_inactive_count--;
-               m->inactive = FALSE;
-       }
-       if (m->wire_count == 0) {
-               if (m->active)
-                       panic("vm_page_activate: already active");
-
-               queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
-               m->active = TRUE;
-               vm_page_active_count++;
+               vm_page_release(mem, laundry, external);
        }
 }
 
diff --git a/xen/block.c b/xen/block.c
index 7d6f1ca..2568598 100644
--- a/xen/block.c
+++ b/xen/block.c
@@ -457,7 +457,7 @@ device_read (void *d, ipc_port_t reply_port,
       /* Allocate pages.  */
       while (alloc_offset < offset + len)
        {
-         while ((m = vm_page_grab (FALSE)) == 0)
+         while ((m = vm_page_grab ()) == 0)
            VM_PAGE_WAIT (0);
          assert (! m->active && ! m->inactive);
          m->busy = TRUE;
@@ -568,7 +568,10 @@ device_write(void *d, ipc_port_t reply_port,
 {
   io_return_t err = 0;
   vm_map_copy_t copy = (vm_map_copy_t) data;
-  vm_offset_t aligned_buffer = 0;
+  vm_offset_t buffer = 0;
+  char *map_data;
+  vm_offset_t map_addr;
+  vm_size_t map_size;
   unsigned copy_npages = atop(round_page(count));
   vm_offset_t phys_addrs[copy_npages];
   struct block_data *bd = d;
@@ -576,6 +579,7 @@ device_write(void *d, ipc_port_t reply_port,
   grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   unsigned reqn, size;
   unsigned i, nbpages, j;
+  kern_return_t kr;
 
   if (!(bd->mode & D_WRITE))
     return D_READ_ONLY;
@@ -591,31 +595,24 @@ device_write(void *d, ipc_port_t reply_port,
   if (count > copy->size)
     return D_INVALID_SIZE;
 
-  if (copy->type != VM_MAP_COPY_PAGE_LIST || copy->offset & PAGE_MASK) {
-    /* Unaligned write.  Has to copy data before passing it to the backend.  */
-    kern_return_t kr;
-    vm_offset_t buffer;
-
-    kr = kmem_alloc(device_io_map, &aligned_buffer, count);
-    if (kr != KERN_SUCCESS)
-      return kr;
-
-    kr = vm_map_copyout(device_io_map, &buffer, vm_map_copy_copy(copy));
-    if (kr != KERN_SUCCESS) {
-      kmem_free(device_io_map, aligned_buffer, count);
-      return kr;
-    }
-
-    memcpy((void*) aligned_buffer, (void*) buffer, count);
+  /* XXX The underlying physical pages of the mapping could be highmem,
+     for which drivers require the use of a bounce buffer.  */
+  kr = kmem_alloc(device_io_map, &buffer, count);
+  if (kr != KERN_SUCCESS)
+    return kr;
+
+  kr = kmem_io_map_copyout(device_io_map, (vm_offset_t *)&map_data,
+                          &map_addr, &map_size, copy, count);
+  if (kr != KERN_SUCCESS) {
+    kmem_free(device_io_map, buffer, count);
+    return kr;
+  }
 
-    vm_deallocate (device_io_map, buffer, count);
+  memcpy((void *)buffer, map_data, count);
+  kmem_io_map_deallocate(device_io_map, map_addr, map_size);
 
-    for (i = 0; i < copy_npages; i++)
-      phys_addrs[i] = kvtophys(aligned_buffer + ptoa(i));
-  } else {
-    for (i = 0; i < copy_npages; i++)
-      phys_addrs[i] = copy->cpy_page_list[i]->phys_addr;
-  }
+  for (i = 0; i < copy_npages; i++)
+    phys_addrs[i] = kvtophys(buffer + ptoa(i));
 
   for (i=0; i<copy_npages; i+=nbpages) {
 
@@ -674,8 +671,8 @@ device_write(void *d, ipc_port_t reply_port,
     }
   }
 
-  if (aligned_buffer)
-    kmem_free(device_io_map, aligned_buffer, count);
+  if (buffer)
+    kmem_free(device_io_map, buffer, count);
 
   vm_map_copy_discard (copy);
 
diff --git a/xen/console.c b/xen/console.c
index 9e8db8f..aed63cb 100644
--- a/xen/console.c
+++ b/xen/console.c
@@ -116,8 +116,8 @@ static void hypcnintr(int unit, spl_t spl, void *ret_addr, 
void *regs) {
                mb();
                console->in_cons++;
 #if    MACH_KDB
-               if (c == (char)'�') {
-                       printf("� pressed\n");
+               if (c == (char)0xA3) {
+                       printf("pound pressed\n");
                        kdb_kintr();
                        continue;
                }
diff --git a/xen/net.c b/xen/net.c
index 5a3f90d..1112138 100644
--- a/xen/net.c
+++ b/xen/net.c
@@ -29,6 +29,7 @@
 #include <device/net_io.h>
 #include <device/device_reply.user.h>
 #include <device/device_emul.h>
+#include <device/ds_routines.h>
 #include <intel/pmap.h>
 #include <xen/public/io/netif.h>
 #include <xen/public/memory.h>
@@ -601,9 +602,11 @@ device_write(void *d, ipc_port_t reply_port,
        struct ifnet *ifp = &nd->ifnet;
        netif_tx_request_t *req;
        unsigned reqn;
-       vm_offset_t offset;
-       vm_page_t m;
-       vm_size_t size;
+       vm_offset_t buffer;
+       char *map_data;
+       vm_offset_t map_addr;
+       vm_size_t map_size;
+       kern_return_t kr;
 
        /* The maximum that we can handle.  */
        assert(ifp->if_header_size + ifp->if_mtu <= PAGE_SIZE);
@@ -617,26 +620,21 @@ device_write(void *d, ipc_port_t reply_port,
        assert(copy->cpy_npages <= 2);
        assert(copy->cpy_npages >= 1);
 
-       offset = copy->offset & PAGE_MASK;
-       if (paranoia || copy->cpy_npages == 2) {
-               /* have to copy :/ */
-               while ((m = vm_page_grab(FALSE)) == 0)
-                       VM_PAGE_WAIT (0);
-               assert (! m->active && ! m->inactive);
-               m->busy = TRUE;
+       kr = kmem_alloc(device_io_map, &buffer, count);
 
-               if (copy->cpy_npages == 1)
-                       size = count;
-               else
-                       size = PAGE_SIZE - offset;
+       if (kr != KERN_SUCCESS)
+               return kr;
+
+       kr = kmem_io_map_copyout(device_io_map, (vm_offset_t *)&map_data,
+                                &map_addr, &map_size, copy, count);
 
-               memcpy((void*)phystokv(m->phys_addr), 
(void*)phystokv(copy->cpy_page_list[0]->phys_addr + offset), size);
-               if (copy->cpy_npages == 2)
-                       memcpy((void*)phystokv(m->phys_addr + size), 
(void*)phystokv(copy->cpy_page_list[1]->phys_addr), count - size);
+       if (kr != KERN_SUCCESS) {
+               kmem_free(device_io_map, buffer, count);
+               return kr;
+       }
 
-               offset = 0;
-       } else
-               m = copy->cpy_page_list[0];
+       memcpy((void *)buffer, map_data, count);
+       kmem_io_map_deallocate(device_io_map, map_addr, map_size);
 
        /* allocate a request */
        spl_t spl = splimp();
@@ -653,8 +651,8 @@ device_write(void *d, ipc_port_t reply_port,
        (void) splx(spl);
 
        req = RING_GET_REQUEST(&nd->tx, reqn);
-       req->gref = gref = hyp_grant_give(nd->domid, atop(m->phys_addr), 1);
-       req->offset = offset;
+       req->gref = gref = hyp_grant_give(nd->domid, atop(kvtophys(buffer)), 1);
+       req->offset = 0;
        req->flags = 0;
        req->id = gref;
        req->size = count;
@@ -685,11 +683,11 @@ device_write(void *d, ipc_port_t reply_port,
              /* Suitable for Ethernet only.  */
              header = (struct ether_header *) (net_kmsg (kmsg)->header);
              packet = (struct packet_header *) (net_kmsg (kmsg)->packet);
-             memcpy (header, (void*)phystokv(m->phys_addr + offset), sizeof 
(struct ether_header));
+             memcpy (header, (void*)buffer, sizeof (struct ether_header));
        
              /* packet is prefixed with a struct packet_header,
                 see include/device/net_status.h.  */
-             memcpy (packet + 1, (void*)phystokv(m->phys_addr + offset + 
sizeof (struct ether_header)),
+             memcpy (packet + 1, (void*)buffer + sizeof (struct ether_header),
                      count - sizeof (struct ether_header));
              packet->length = count - sizeof (struct ether_header)
                               + sizeof (struct packet_header);
@@ -702,8 +700,7 @@ device_write(void *d, ipc_port_t reply_port,
            }
        }
 
-       if (paranoia || copy->cpy_npages == 2)
-               VM_PAGE_FREE(m);
+       kmem_free(device_io_map, buffer, count);
 
        vm_map_copy_discard (copy);
 

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/pkg-hurd/gnumach.git



reply via email to

[Prev in Thread] Current Thread [Next in Thread]