...

Source file src/runtime/mem_linux.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package runtime
     6	
     7	import (
     8		"runtime/internal/atomic"
     9		"unsafe"
    10	)
    11	
    12	const (
    13		_EACCES = 13
    14		_EINVAL = 22
    15	)
    16	
    17	// Don't split the stack as this method may be invoked without a valid G, which
    18	// prevents us from allocating more stack.
    19	//go:nosplit
    20	func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
    21		p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    22		if err != 0 {
    23			if err == _EACCES {
    24				print("runtime: mmap: access denied\n")
    25				exit(2)
    26			}
    27			if err == _EAGAIN {
    28				print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
    29				exit(2)
    30			}
    31			return nil
    32		}
    33		mSysStatInc(sysStat, n)
    34		return p
    35	}
    36	
    37	var adviseUnused = uint32(_MADV_FREE)
    38	
    39	func sysUnused(v unsafe.Pointer, n uintptr) {
    40		// By default, Linux's "transparent huge page" support will
    41		// merge pages into a huge page if there's even a single
    42		// present regular page, undoing the effects of madvise(adviseUnused)
    43		// below. On amd64, that means khugepaged can turn a single
    44		// 4KB page to 2MB, bloating the process's RSS by as much as
    45		// 512X. (See issue #8832 and Linux kernel bug
    46		// https://bugzilla.kernel.org/show_bug.cgi?id=93111)
    47		//
    48		// To work around this, we explicitly disable transparent huge
    49		// pages when we release pages of the heap. However, we have
    50		// to do this carefully because changing this flag tends to
    51		// split the VMA (memory mapping) containing v in to three
    52		// VMAs in order to track the different values of the
    53		// MADV_NOHUGEPAGE flag in the different regions. There's a
    54		// default limit of 65530 VMAs per address space (sysctl
    55		// vm.max_map_count), so we must be careful not to create too
    56		// many VMAs (see issue #12233).
    57		//
    58		// Since huge pages are huge, there's little use in adjusting
    59		// the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
    60		// exploding the number of VMAs by only adjusting the
    61		// MADV_NOHUGEPAGE flag on a large granularity. This still
    62		// gets most of the benefit of huge pages while keeping the
    63		// number of VMAs under control. With hugePageSize = 2MB, even
    64		// a pessimal heap can reach 128GB before running out of VMAs.
    65		if physHugePageSize != 0 {
    66			// If it's a large allocation, we want to leave huge
    67			// pages enabled. Hence, we only adjust the huge page
    68			// flag on the huge pages containing v and v+n-1, and
    69			// only if those aren't aligned.
    70			var head, tail uintptr
    71			if uintptr(v)&(physHugePageSize-1) != 0 {
    72				// Compute huge page containing v.
    73				head = uintptr(v) &^ (physHugePageSize - 1)
    74			}
    75			if (uintptr(v)+n)&(physHugePageSize-1) != 0 {
    76				// Compute huge page containing v+n-1.
    77				tail = (uintptr(v) + n - 1) &^ (physHugePageSize - 1)
    78			}
    79	
    80			// Note that madvise will return EINVAL if the flag is
    81			// already set, which is quite likely. We ignore
    82			// errors.
    83			if head != 0 && head+physHugePageSize == tail {
    84				// head and tail are different but adjacent,
    85				// so do this in one call.
    86				madvise(unsafe.Pointer(head), 2*physHugePageSize, _MADV_NOHUGEPAGE)
    87			} else {
    88				// Advise the huge pages containing v and v+n-1.
    89				if head != 0 {
    90					madvise(unsafe.Pointer(head), physHugePageSize, _MADV_NOHUGEPAGE)
    91				}
    92				if tail != 0 && tail != head {
    93					madvise(unsafe.Pointer(tail), physHugePageSize, _MADV_NOHUGEPAGE)
    94				}
    95			}
    96		}
    97	
    98		if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
    99			// madvise will round this to any physical page
   100			// *covered* by this range, so an unaligned madvise
   101			// will release more memory than intended.
   102			throw("unaligned sysUnused")
   103		}
   104	
   105		var advise uint32
   106		if debug.madvdontneed != 0 {
   107			advise = _MADV_DONTNEED
   108		} else {
   109			advise = atomic.Load(&adviseUnused)
   110		}
   111		if errno := madvise(v, n, int32(advise)); advise == _MADV_FREE && errno != 0 {
   112			// MADV_FREE was added in Linux 4.5. Fall back to MADV_DONTNEED if it is
   113			// not supported.
   114			atomic.Store(&adviseUnused, _MADV_DONTNEED)
   115			madvise(v, n, _MADV_DONTNEED)
   116		}
   117	}
   118	
   119	func sysUsed(v unsafe.Pointer, n uintptr) {
   120		// Partially undo the NOHUGEPAGE marks from sysUnused
   121		// for whole huge pages between v and v+n. This may
   122		// leave huge pages off at the end points v and v+n
   123		// even though allocations may cover these entire huge
   124		// pages. We could detect this and undo NOHUGEPAGE on
   125		// the end points as well, but it's probably not worth
   126		// the cost because when neighboring allocations are
   127		// freed sysUnused will just set NOHUGEPAGE again.
   128		sysHugePage(v, n)
   129	}
   130	
   131	func sysHugePage(v unsafe.Pointer, n uintptr) {
   132		if physHugePageSize != 0 {
   133			// Round v up to a huge page boundary.
   134			beg := (uintptr(v) + (physHugePageSize - 1)) &^ (physHugePageSize - 1)
   135			// Round v+n down to a huge page boundary.
   136			end := (uintptr(v) + n) &^ (physHugePageSize - 1)
   137	
   138			if beg < end {
   139				madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
   140			}
   141		}
   142	}
   143	
   144	// Don't split the stack as this function may be invoked without a valid G,
   145	// which prevents us from allocating more stack.
   146	//go:nosplit
   147	func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
   148		mSysStatDec(sysStat, n)
   149		munmap(v, n)
   150	}
   151	
   152	func sysFault(v unsafe.Pointer, n uintptr) {
   153		mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
   154	}
   155	
   156	func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
   157		p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   158		if err != 0 {
   159			return nil
   160		}
   161		return p
   162	}
   163	
   164	func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
   165		mSysStatInc(sysStat, n)
   166	
   167		p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
   168		if err == _ENOMEM {
   169			throw("runtime: out of memory")
   170		}
   171		if p != v || err != 0 {
   172			throw("runtime: cannot map pages in arena address space")
   173		}
   174	}
   175	

View as plain text