Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions pkg/sentry/platform/kvm/bluepill_fault.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@ import (
var (
// faultBlockSize is the size used for servicing memory faults.
//
// This should be large enough to avoid frequent faults and avoid using
// all available KVM slots (~512), but small enough that KVM does not
// complain about slot sizes (~4GB). See handleBluepillFault for how
// this block is used.
faultBlockSize = uintptr(2 << 30)
// This should be large enough so that the total number of slots
// required to cover the 47-bit virtual address space does not exceed
// the KVM slot limit (e.g. 32764). Linux doesn't allocate virtual
// address space above 47-bit by default.
// It must be small enough to limit the memory overhead associated with
// KVM slot allocation. For example, using a 46-bit address space
// results in an overhead of ~250 MB.
faultBlockSize = uintptr(8 << 30)

// faultBlockMask is the mask for the fault blocks.
//
Expand Down
10 changes: 9 additions & 1 deletion pkg/sentry/platform/kvm/physical_map.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func fillAddressSpace() (specialRegions []specialVirtualRegion) {
pSize := uintptr(1) << ring0.PhysicalAddressBits
pSize -= reservedMemory

maxUserAddr := uintptr(0)
// Add specifically excluded regions; see excludeVirtualRegion.
if err := applyVirtualRegions(func(vr virtualRegion) {
if excludeVirtualRegion(vr) {
Expand All @@ -81,10 +82,17 @@ func fillAddressSpace() (specialRegions []specialVirtualRegion) {
})
log.Infof("mmio: virtual [%x,%x)", vr.virtual, vr.virtual+vr.length)
}
if vr.filename != "[vsyscall]" {
maxUserAddr = vr.region.virtual + vr.region.length
}
}); err != nil {
panic(fmt.Sprintf("error parsing /proc/self/maps: %v", err))
}

var archRegions []specialVirtualRegion
vSize, archRegions = archSpecialRegions(vSize, maxUserAddr)
specialRegions = append(specialRegions, archRegions...)

// Do we need any more work?
if vSize < pSize {
return specialRegions
Expand All @@ -109,7 +117,7 @@ func fillAddressSpace() (specialRegions []specialVirtualRegion) {
current := required // Attempted mmap size.
filled := uintptr(0)
suggestedAddr := uintptr(0)
if ring0.VirtualAddressBits > 48 {
if extendedAddressSpaceAllowed && ring0.VirtualAddressBits > 48 {
// Pass a hint address above 47 bits to indicate to the kernel that
// we can handle, and want, mappings above 47 bits:
// https://docs.kernel.org/arch/x86/x86_64/5level-paging.html#user-space-and-large-virtual-address-space.
Expand Down
53 changes: 53 additions & 0 deletions pkg/sentry/platform/kvm/physical_map_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,62 @@

package kvm

import (
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/ring0"
)

const (
// reservedMemory is a chunk of physical memory reserved starting at
// physical address zero. There are some special pages in this region,
// so we just call the whole thing off.
reservedMemory = 0x100000000
)

const (
// defaultAddressSpaceSize is the default limit for the user virtual
// address space, which is 47-bits (2^47 bytes). The mmap syscall
// respects this limit by default, even with 5-level page tables
// enabled.
defaultAddressSpaceSize = uintptr(1) << 47

// extendedAddressSpaceAllowed controls address space usage beyond
// the default 47-bit limit. It is set to 'false' for several reasons:
// * There are no known use cases requiring the extended address space.
// * By restricting the size, we avoid the overhead of:
// a) Aligning the virtual address space size to the physical
// address space size.
// b) Creating unnecessary page table entries for the unused
// extended range.
// * The memory slot size is currently configured only to cover
// the default 47-bit address space.
// * 5-level page table support was primarily introduced to workaround
// a specific kernel bug where VDSO could be mapped above the 47-bit
// boundary (v6.9-rc1~186^2~7).
extendedAddressSpaceAllowed = false
)

// archSpecialRegions returns special regions that are excluded from the virtual
// address space. Linux doesn't map vma-s above 47-bit by default.
func archSpecialRegions(vSize uintptr, maxUserAddr uintptr) (uintptr, []specialVirtualRegion) {
var specialRegions []specialVirtualRegion
if extendedAddressSpaceAllowed || vSize <= defaultAddressSpaceSize {
return vSize, nil
}
// This is a workaround for the kernel bug when vdso can be
// mapped above the 47-bit address space boundary.
if defaultAddressSpaceSize > maxUserAddr {
maxUserAddr = defaultAddressSpaceSize
}
r := region{
virtual: maxUserAddr,
length: ring0.MaximumUserAddress - defaultAddressSpaceSize,
}
specialRegions = append(specialRegions, specialVirtualRegion{
region: r,
})
vSize -= r.length
log.Infof("excluded: virtual [%x,%x)", r.virtual, r.virtual+r.length)

return vSize, specialRegions
}
6 changes: 6 additions & 0 deletions pkg/sentry/platform/kvm/physical_map_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,10 @@ package kvm

const (
reservedMemory = 0
// 5-level page tables are not implemeted on arm64.
extendedAddressSpaceAllowed = false
)

func archSpecialRegions(vSize uintptr, maxUserAddr uintptr) (uintptr, []specialVirtualRegion) {
return vSize, nil
}
Loading