memblock: updates for 6.11-rc1
* reserve_mem command line parameter to allow creation of named memory reservation at boot time. The driving use-case is to improve the ability of pstore to retain ramoops data across reboots. * cleaunps and small improvements in memblock and mm_init * new tests cases in memblock test suite -----BEGIN PGP SIGNATURE----- iQFEBAABCgAuFiEEeOVYVaWZL5900a/pOQOGJssO/ZEFAmaXfoIQHHJwcHRAa2Vy bmVsLm9yZwAKCRA5A4Ymyw79kU5mCAC23vIrB8FRlORczMYj+V3VFss3OjKT92lS fHGwq2oxHW+rdDpHXFObHU0D3k8d2l5jyrENRAAyA02qR0L6Pv8Na6pGxQua1eic VIdw0PFQMsizD1AIj84Y6skkyyF/tvZHpmX0B12D5+Ur65DC/Z867Cm/lE33/fHv /1+QB0JlG7W+FzxVstYyebY5/DVkH+bC7/A57FE2oB4BRXjEd8v9tTHBS4kRSvrE zE2KFxeGajN749LHztIpIprPKehu8Gc3oLrYLNJO+uLFVCV8ey3OqVj0RXMG2wLl hmVYqhbZM/Uz59D/P8pULD49f1Thjv/5A/MvUZ3SxM6zpWlsincf =xrZd -----END PGP SIGNATURE----- Merge tag 'memblock-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock Pull memblock updates from Mike Rapoport: - 'reserve_mem' command line parameter to allow creation of named memory reservation at boot time. The driving use-case is to improve the ability of pstore to retain ramoops data across reboots. - cleanups and small improvements in memblock and mm_init - new tests cases in memblock test suite * tag 'memblock-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock: memblock tests: fix implicit declaration of function 'numa_valid_node' memblock: Move late alloc warning down to phys alloc pstore/ramoops: Add ramoops.mem_name= command line option mm/memblock: Add "reserve_mem" to reserved named memory at boot up mm/mm_init.c: don't initialize page->lru again mm/mm_init.c: not always search next deferred_init_pfn from very beginning mm/mm_init.c: use deferred_init_mem_pfn_range_in_zone() to decide loop condition mm/mm_init.c: get the highest zone directly mm/mm_init.c: move nr_initialised reset down a bit mm/memblock: fix a typo in description of for_each_mem_region() mm/mm_init.c: use memblock_region_memory_base_pfn() to get startpfn mm/memblock: use PAGE_ALIGN_DOWN to get pgend in free_memmap mm/memblock: return true directly on finding overlap region memblock tests: add memblock_overlaps_region_checks mm/memblock: fix comment for memblock_isolate_range() memblock tests: add memblock_reserve_many_may_conflict_check() memblock tests: add memblock_reserve_all_locations_check() mm/memblock: remove empty dummy entry
This commit is contained in:
commit
b2fc97c186
@ -5674,6 +5674,28 @@
|
||||
them. If <base> is less than 0x10000, the region
|
||||
is assumed to be I/O ports; otherwise it is memory.
|
||||
|
||||
reserve_mem= [RAM]
|
||||
Format: nn[KNG]:<align>:<label>
|
||||
Reserve physical memory and label it with a name that
|
||||
other subsystems can use to access it. This is typically
|
||||
used for systems that do not wipe the RAM, and this command
|
||||
line will try to reserve the same physical memory on
|
||||
soft reboots. Note, it is not guaranteed to be the same
|
||||
location. For example, if anything about the system changes
|
||||
or if booting a different kernel. It can also fail if KASLR
|
||||
places the kernel at the location of where the RAM reservation
|
||||
was from a previous boot, the new reservation will be at a
|
||||
different location.
|
||||
Any subsystem using this feature must add a way to verify
|
||||
that the contents of the physical memory is from a previous
|
||||
boot, as there may be cases where the memory will not be
|
||||
located at the same location.
|
||||
|
||||
The format is size:align:label for example, to request
|
||||
12 megabytes of 4096 alignment for ramoops:
|
||||
|
||||
reserve_mem=12M:4096:oops ramoops.mem_name=oops
|
||||
|
||||
reservetop= [X86-32,EARLY]
|
||||
Format: nn[KMG]
|
||||
Reserves a hole at the top of the kernel virtual
|
||||
|
@ -23,6 +23,8 @@ and type of the memory area are set using three variables:
|
||||
* ``mem_size`` for the size. The memory size will be rounded down to a
|
||||
power of two.
|
||||
* ``mem_type`` to specify if the memory type (default is pgprot_writecombine).
|
||||
* ``mem_name`` to specify a memory region defined by ``reserve_mem`` command
|
||||
line parameter.
|
||||
|
||||
Typically the default value of ``mem_type=0`` should be used as that sets the pstore
|
||||
mapping to pgprot_writecombine. Setting ``mem_type=1`` attempts to use
|
||||
@ -118,6 +120,17 @@ Setting the ramoops parameters can be done in several different manners:
|
||||
return ret;
|
||||
}
|
||||
|
||||
D. Using a region of memory reserved via ``reserve_mem`` command line
|
||||
parameter. The address and size will be defined by the ``reserve_mem``
|
||||
parameter. Note, that ``reserve_mem`` may not always allocate memory
|
||||
in the same location, and cannot be relied upon. Testing will need
|
||||
to be done, and it may not work on every machine, nor every kernel.
|
||||
Consider this a "best effort" approach. The ``reserve_mem`` option
|
||||
takes a size, alignment and name as arguments. The name is used
|
||||
to map the memory to a label that can be retrieved by ramoops.
|
||||
|
||||
reserver_mem=2M:4096:oops ramoops.mem_name=oops
|
||||
|
||||
You can specify either RAM memory or peripheral devices' memory. However, when
|
||||
specifying RAM, be sure to reserve the memory by issuing memblock_reserve()
|
||||
very early in the architecture code, e.g.::
|
||||
|
@ -50,6 +50,10 @@ module_param_hw(mem_address, ullong, other, 0400);
|
||||
MODULE_PARM_DESC(mem_address,
|
||||
"start of reserved RAM used to store oops/panic logs");
|
||||
|
||||
static char *mem_name;
|
||||
module_param_named(mem_name, mem_name, charp, 0400);
|
||||
MODULE_PARM_DESC(mem_name, "name of kernel param that holds addr");
|
||||
|
||||
static ulong mem_size;
|
||||
module_param(mem_size, ulong, 0400);
|
||||
MODULE_PARM_DESC(mem_size,
|
||||
@ -914,6 +918,16 @@ static void __init ramoops_register_dummy(void)
|
||||
{
|
||||
struct ramoops_platform_data pdata;
|
||||
|
||||
if (mem_name) {
|
||||
phys_addr_t start;
|
||||
phys_addr_t size;
|
||||
|
||||
if (reserve_mem_find_by_name(mem_name, &start, &size)) {
|
||||
mem_address = start;
|
||||
mem_size = size;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare a dummy platform data structure to carry the module
|
||||
* parameters. If mem_size isn't set, then there are no module
|
||||
|
@ -299,25 +299,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
|
||||
void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
|
||||
unsigned long *out_spfn,
|
||||
unsigned long *out_epfn);
|
||||
/**
|
||||
* for_each_free_mem_pfn_range_in_zone - iterate through zone specific free
|
||||
* memblock areas
|
||||
* @i: u64 used as loop variable
|
||||
* @zone: zone in which all of the memory blocks reside
|
||||
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
|
||||
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
|
||||
*
|
||||
* Walks over free (memory && !reserved) areas of memblock in a specific
|
||||
* zone. Available once memblock and an empty zone is initialized. The main
|
||||
* assumption is that the zone start, end, and pgdat have been associated.
|
||||
* This way we can use the zone to determine NUMA node, and if a given part
|
||||
* of the memblock is valid for the zone.
|
||||
*/
|
||||
#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end) \
|
||||
for (i = 0, \
|
||||
__next_mem_pfn_range_in_zone(&i, zone, p_start, p_end); \
|
||||
i != U64_MAX; \
|
||||
__next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
|
||||
|
||||
/**
|
||||
* for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific
|
||||
@ -565,7 +546,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
|
||||
}
|
||||
|
||||
/**
|
||||
* for_each_mem_region - itereate over memory regions
|
||||
* for_each_mem_region - iterate over memory regions
|
||||
* @region: loop variable
|
||||
*/
|
||||
#define for_each_mem_region(region) \
|
||||
|
@ -4261,4 +4261,6 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
|
||||
void vma_pgtable_walk_begin(struct vm_area_struct *vma);
|
||||
void vma_pgtable_walk_end(struct vm_area_struct *vma);
|
||||
|
||||
int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
|
||||
|
||||
#endif /* _LINUX_MM_H */
|
||||
|
151
mm/memblock.c
151
mm/memblock.c
@ -114,12 +114,10 @@ static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS
|
||||
|
||||
struct memblock memblock __initdata_memblock = {
|
||||
.memory.regions = memblock_memory_init_regions,
|
||||
.memory.cnt = 1, /* empty dummy entry */
|
||||
.memory.max = INIT_MEMBLOCK_MEMORY_REGIONS,
|
||||
.memory.name = "memory",
|
||||
|
||||
.reserved.regions = memblock_reserved_init_regions,
|
||||
.reserved.cnt = 1, /* empty dummy entry */
|
||||
.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS,
|
||||
.reserved.name = "reserved",
|
||||
|
||||
@ -130,7 +128,6 @@ struct memblock memblock __initdata_memblock = {
|
||||
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
|
||||
struct memblock_type physmem = {
|
||||
.regions = memblock_physmem_init_regions,
|
||||
.cnt = 1, /* empty dummy entry */
|
||||
.max = INIT_PHYSMEM_REGIONS,
|
||||
.name = "physmem",
|
||||
};
|
||||
@ -197,8 +194,8 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
|
||||
for (i = 0; i < type->cnt; i++)
|
||||
if (memblock_addrs_overlap(base, size, type->regions[i].base,
|
||||
type->regions[i].size))
|
||||
break;
|
||||
return i < type->cnt;
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -356,7 +353,6 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
|
||||
/* Special case for empty arrays */
|
||||
if (type->cnt == 0) {
|
||||
WARN_ON(type->total_size != 0);
|
||||
type->cnt = 1;
|
||||
type->regions[0].base = 0;
|
||||
type->regions[0].size = 0;
|
||||
type->regions[0].flags = 0;
|
||||
@ -600,12 +596,13 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
|
||||
|
||||
/* special case for empty array */
|
||||
if (type->regions[0].size == 0) {
|
||||
WARN_ON(type->cnt != 1 || type->total_size);
|
||||
WARN_ON(type->cnt != 0 || type->total_size);
|
||||
type->regions[0].base = base;
|
||||
type->regions[0].size = size;
|
||||
type->regions[0].flags = flags;
|
||||
memblock_set_region_node(&type->regions[0], nid);
|
||||
type->total_size = size;
|
||||
type->cnt = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -780,7 +777,8 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt
|
||||
* Walk @type and ensure that regions don't cross the boundaries defined by
|
||||
* [@base, @base + @size). Crossing regions are split at the boundaries,
|
||||
* which may create at most two more regions. The index of the first
|
||||
* region inside the range is returned in *@start_rgn and end in *@end_rgn.
|
||||
* region inside the range is returned in *@start_rgn and the index of the
|
||||
* first region after the range is returned in *@end_rgn.
|
||||
*
|
||||
* Return:
|
||||
* 0 on success, -errno on failure.
|
||||
@ -1441,6 +1439,17 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
|
||||
enum memblock_flags flags = choose_memblock_flags();
|
||||
phys_addr_t found;
|
||||
|
||||
/*
|
||||
* Detect any accidental use of these APIs after slab is ready, as at
|
||||
* this moment memblock may be deinitialized already and its
|
||||
* internal data may be destroyed (after execution of memblock_free_all)
|
||||
*/
|
||||
if (WARN_ON_ONCE(slab_is_available())) {
|
||||
void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid);
|
||||
|
||||
return vaddr ? virt_to_phys(vaddr) : 0;
|
||||
}
|
||||
|
||||
if (!align) {
|
||||
/* Can't use WARNs this early in boot on powerpc */
|
||||
dump_stack();
|
||||
@ -1566,13 +1575,6 @@ static void * __init memblock_alloc_internal(
|
||||
{
|
||||
phys_addr_t alloc;
|
||||
|
||||
/*
|
||||
* Detect any accidental use of these APIs after slab is ready, as at
|
||||
* this moment memblock may be deinitialized already and its
|
||||
* internal data may be destroyed (after execution of memblock_free_all)
|
||||
*/
|
||||
if (WARN_ON_ONCE(slab_is_available()))
|
||||
return kzalloc_node(size, GFP_NOWAIT, nid);
|
||||
|
||||
if (max_addr > memblock.current_limit)
|
||||
max_addr = memblock.current_limit;
|
||||
@ -2031,7 +2033,7 @@ static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn)
|
||||
* downwards.
|
||||
*/
|
||||
pg = PAGE_ALIGN(__pa(start_pg));
|
||||
pgend = __pa(end_pg) & PAGE_MASK;
|
||||
pgend = PAGE_ALIGN_DOWN(__pa(end_pg));
|
||||
|
||||
/*
|
||||
* If there are free pages between these, free the section of the
|
||||
@ -2234,6 +2236,123 @@ void __init memblock_free_all(void)
|
||||
totalram_pages_add(pages);
|
||||
}
|
||||
|
||||
/* Keep a table to reserve named memory */
|
||||
#define RESERVE_MEM_MAX_ENTRIES 8
|
||||
#define RESERVE_MEM_NAME_SIZE 16
|
||||
struct reserve_mem_table {
|
||||
char name[RESERVE_MEM_NAME_SIZE];
|
||||
phys_addr_t start;
|
||||
phys_addr_t size;
|
||||
};
|
||||
static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES];
|
||||
static int reserved_mem_count;
|
||||
|
||||
/* Add wildcard region with a lookup name */
|
||||
static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size,
|
||||
const char *name)
|
||||
{
|
||||
struct reserve_mem_table *map;
|
||||
|
||||
map = &reserved_mem_table[reserved_mem_count++];
|
||||
map->start = start;
|
||||
map->size = size;
|
||||
strscpy(map->name, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* reserve_mem_find_by_name - Find reserved memory region with a given name
|
||||
* @name: The name that is attached to a reserved memory region
|
||||
* @start: If found, holds the start address
|
||||
* @size: If found, holds the size of the address.
|
||||
*
|
||||
* @start and @size are only updated if @name is found.
|
||||
*
|
||||
* Returns: 1 if found or 0 if not found.
|
||||
*/
|
||||
int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size)
|
||||
{
|
||||
struct reserve_mem_table *map;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < reserved_mem_count; i++) {
|
||||
map = &reserved_mem_table[i];
|
||||
if (!map->size)
|
||||
continue;
|
||||
if (strcmp(name, map->name) == 0) {
|
||||
*start = map->start;
|
||||
*size = map->size;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(reserve_mem_find_by_name);
|
||||
|
||||
/*
|
||||
* Parse reserve_mem=nn:align:name
|
||||
*/
|
||||
static int __init reserve_mem(char *p)
|
||||
{
|
||||
phys_addr_t start, size, align, tmp;
|
||||
char *name;
|
||||
char *oldp;
|
||||
int len;
|
||||
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
|
||||
/* Check if there's room for more reserved memory */
|
||||
if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES)
|
||||
return -EBUSY;
|
||||
|
||||
oldp = p;
|
||||
size = memparse(p, &p);
|
||||
if (!size || p == oldp)
|
||||
return -EINVAL;
|
||||
|
||||
if (*p != ':')
|
||||
return -EINVAL;
|
||||
|
||||
align = memparse(p+1, &p);
|
||||
if (*p != ':')
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* memblock_phys_alloc() doesn't like a zero size align,
|
||||
* but it is OK for this command to have it.
|
||||
*/
|
||||
if (align < SMP_CACHE_BYTES)
|
||||
align = SMP_CACHE_BYTES;
|
||||
|
||||
name = p + 1;
|
||||
len = strlen(name);
|
||||
|
||||
/* name needs to have length but not too big */
|
||||
if (!len || len >= RESERVE_MEM_NAME_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
/* Make sure that name has text */
|
||||
for (p = name; *p; p++) {
|
||||
if (!isspace(*p))
|
||||
break;
|
||||
}
|
||||
if (!*p)
|
||||
return -EINVAL;
|
||||
|
||||
/* Make sure the name is not already used */
|
||||
if (reserve_mem_find_by_name(name, &start, &tmp))
|
||||
return -EBUSY;
|
||||
|
||||
start = memblock_phys_alloc(size, align);
|
||||
if (!start)
|
||||
return -ENOMEM;
|
||||
|
||||
reserved_mem_add(start, size, name);
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("reserve_mem=", reserve_mem);
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK)
|
||||
static const char * const flagname[] = {
|
||||
[ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG",
|
||||
|
69
mm/mm_init.c
69
mm/mm_init.c
@ -363,7 +363,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
|
||||
|
||||
nid = memblock_get_region_node(r);
|
||||
|
||||
usable_startpfn = PFN_DOWN(r->base);
|
||||
usable_startpfn = memblock_region_memory_base_pfn(r);
|
||||
zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
|
||||
min(usable_startpfn, zone_movable_pfn[nid]) :
|
||||
usable_startpfn;
|
||||
@ -676,6 +676,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
|
||||
|
||||
if (early_page_ext_enabled())
|
||||
return false;
|
||||
|
||||
/* Always populate low zones for address-constrained allocations */
|
||||
if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
|
||||
return false;
|
||||
|
||||
if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* prev_end_pfn static that contains the end of previous zone
|
||||
* No need to protect because called very early in boot before smp_init.
|
||||
@ -685,12 +693,6 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
|
||||
nr_initialised = 0;
|
||||
}
|
||||
|
||||
/* Always populate low zones for address-constrained allocations */
|
||||
if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
|
||||
return false;
|
||||
|
||||
if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
|
||||
return true;
|
||||
/*
|
||||
* We start only with one section of pages, more pages are added as
|
||||
* needed until the rest of deferred pages are initialized.
|
||||
@ -758,9 +760,6 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
|
||||
|
||||
init_reserved_page(start_pfn, nid);
|
||||
|
||||
/* Avoid false-positive PageTail() */
|
||||
INIT_LIST_HEAD(&page->lru);
|
||||
|
||||
/*
|
||||
* no need for atomic set_bit because the struct
|
||||
* page is not visible yet so nobody should
|
||||
@ -2019,24 +2018,29 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is meant to pre-load the iterator for the zone init.
|
||||
* Specifically it walks through the ranges until we are caught up to the
|
||||
* first_init_pfn value and exits there. If we never encounter the value we
|
||||
* return false indicating there are no valid ranges left.
|
||||
* This function is meant to pre-load the iterator for the zone init from
|
||||
* a given point.
|
||||
* Specifically it walks through the ranges starting with initial index
|
||||
* passed to it until we are caught up to the first_init_pfn value and
|
||||
* exits there. If we never encounter the value we return false indicating
|
||||
* there are no valid ranges left.
|
||||
*/
|
||||
static bool __init
|
||||
deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
|
||||
unsigned long *spfn, unsigned long *epfn,
|
||||
unsigned long first_init_pfn)
|
||||
{
|
||||
u64 j;
|
||||
u64 j = *i;
|
||||
|
||||
if (j == 0)
|
||||
__next_mem_pfn_range_in_zone(&j, zone, spfn, epfn);
|
||||
|
||||
/*
|
||||
* Start out by walking through the ranges in this zone that have
|
||||
* already been initialized. We don't need to do anything with them
|
||||
* so we just need to flush them out of the system.
|
||||
*/
|
||||
for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) {
|
||||
for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) {
|
||||
if (*epfn <= first_init_pfn)
|
||||
continue;
|
||||
if (*spfn < first_init_pfn)
|
||||
@ -2108,7 +2112,7 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
|
||||
{
|
||||
unsigned long spfn, epfn;
|
||||
struct zone *zone = arg;
|
||||
u64 i;
|
||||
u64 i = 0;
|
||||
|
||||
deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
|
||||
|
||||
@ -2138,8 +2142,8 @@ static int __init deferred_init_memmap(void *data)
|
||||
unsigned long first_init_pfn, flags;
|
||||
unsigned long start = jiffies;
|
||||
struct zone *zone;
|
||||
int zid, max_threads;
|
||||
u64 i;
|
||||
int max_threads;
|
||||
u64 i = 0;
|
||||
|
||||
/* Bind memory initialisation thread to a local node if possible */
|
||||
if (!cpumask_empty(cpumask))
|
||||
@ -2165,27 +2169,18 @@ static int __init deferred_init_memmap(void *data)
|
||||
*/
|
||||
pgdat_resize_unlock(pgdat, &flags);
|
||||
|
||||
/* Only the highest zone is deferred so find it */
|
||||
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
|
||||
zone = pgdat->node_zones + zid;
|
||||
if (first_init_pfn < zone_end_pfn(zone))
|
||||
break;
|
||||
}
|
||||
|
||||
/* If the zone is empty somebody else may have cleared out the zone */
|
||||
if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
|
||||
first_init_pfn))
|
||||
goto zone_empty;
|
||||
/* Only the highest zone is deferred */
|
||||
zone = pgdat->node_zones + pgdat->nr_zones - 1;
|
||||
|
||||
max_threads = deferred_page_init_max_threads(cpumask);
|
||||
|
||||
while (spfn < epfn) {
|
||||
unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION);
|
||||
while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) {
|
||||
first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION);
|
||||
struct padata_mt_job job = {
|
||||
.thread_fn = deferred_init_memmap_chunk,
|
||||
.fn_arg = zone,
|
||||
.start = spfn,
|
||||
.size = epfn_align - spfn,
|
||||
.size = first_init_pfn - spfn,
|
||||
.align = PAGES_PER_SECTION,
|
||||
.min_chunk = PAGES_PER_SECTION,
|
||||
.max_threads = max_threads,
|
||||
@ -2193,12 +2188,10 @@ static int __init deferred_init_memmap(void *data)
|
||||
};
|
||||
|
||||
padata_do_multithreaded(&job);
|
||||
deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
|
||||
epfn_align);
|
||||
}
|
||||
zone_empty:
|
||||
|
||||
/* Sanity check that the next zone really is unpopulated */
|
||||
WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
|
||||
WARN_ON(pgdat->nr_zones < MAX_NR_ZONES && populated_zone(++zone));
|
||||
|
||||
pr_info("node %d deferred pages initialised in %ums\n",
|
||||
pgdat->node_id, jiffies_to_msecs(jiffies - start));
|
||||
@ -2225,7 +2218,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
|
||||
unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
|
||||
unsigned long spfn, epfn, flags;
|
||||
unsigned long nr_pages = 0;
|
||||
u64 i;
|
||||
u64 i = 0;
|
||||
|
||||
/* Only the last zone may have deferred pages */
|
||||
if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
|
||||
|
@ -12,6 +12,7 @@
|
||||
#define PHYS_ADDR_MAX (~(phys_addr_t)0)
|
||||
|
||||
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
|
||||
#define PAGE_ALIGN_DOWN(addr) ALIGN_DOWN(addr, PAGE_SIZE)
|
||||
|
||||
#define __va(x) ((void *)((unsigned long)(x)))
|
||||
#define __pa(x) ((unsigned long)(x))
|
||||
|
@ -13,4 +13,9 @@
|
||||
|
||||
#define NUMA_NO_NODE (-1)
|
||||
|
||||
static inline bool numa_valid_node(int nid)
|
||||
{
|
||||
return nid >= 0 && nid < MAX_NUMNODES;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_NUMA_H */
|
||||
|
@ -15,12 +15,12 @@ static int memblock_initialization_check(void)
|
||||
PREFIX_PUSH();
|
||||
|
||||
ASSERT_NE(memblock.memory.regions, NULL);
|
||||
ASSERT_EQ(memblock.memory.cnt, 1);
|
||||
ASSERT_EQ(memblock.memory.cnt, 0);
|
||||
ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS);
|
||||
ASSERT_EQ(strcmp(memblock.memory.name, "memory"), 0);
|
||||
|
||||
ASSERT_NE(memblock.reserved.regions, NULL);
|
||||
ASSERT_EQ(memblock.reserved.cnt, 1);
|
||||
ASSERT_EQ(memblock.reserved.cnt, 0);
|
||||
ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS);
|
||||
ASSERT_EQ(strcmp(memblock.reserved.name, "reserved"), 0);
|
||||
|
||||
@ -982,6 +982,262 @@ static int memblock_reserve_many_check(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* A test that trying to reserve the 129th memory block at all locations.
|
||||
* Expect to trigger memblock_double_array() to double the
|
||||
* memblock.memory.max, find a new valid memory as reserved.regions.
|
||||
*
|
||||
* 0 1 2 128
|
||||
* +-------+ +-------+ +-------+ +-------+
|
||||
* | 32K | | 32K | | 32K | ... | 32K |
|
||||
* +-------+-------+-------+-------+-------+ +-------+
|
||||
* |<-32K->| |<-32K->|
|
||||
*
|
||||
*/
|
||||
/* Keep the gap so these memory region will not be merged. */
|
||||
#define MEMORY_BASE(idx) (SZ_128K + (MEM_SIZE * 2) * (idx))
|
||||
static int memblock_reserve_all_locations_check(void)
|
||||
{
|
||||
int i, skip;
|
||||
void *orig_region;
|
||||
struct region r = {
|
||||
.base = SZ_16K,
|
||||
.size = SZ_16K,
|
||||
};
|
||||
phys_addr_t new_reserved_regions_size;
|
||||
|
||||
PREFIX_PUSH();
|
||||
|
||||
/* Reserve the 129th memory block for all possible positions*/
|
||||
for (skip = 0; skip < INIT_MEMBLOCK_REGIONS + 1; skip++) {
|
||||
reset_memblock_regions();
|
||||
memblock_allow_resize();
|
||||
|
||||
/* Add a valid memory region used by double_array(). */
|
||||
dummy_physical_memory_init();
|
||||
memblock_add(dummy_physical_memory_base(), MEM_SIZE);
|
||||
|
||||
for (i = 0; i < INIT_MEMBLOCK_REGIONS + 1; i++) {
|
||||
if (i == skip)
|
||||
continue;
|
||||
|
||||
/* Reserve some fakes memory region to fulfill the memblock. */
|
||||
memblock_reserve(MEMORY_BASE(i), MEM_SIZE);
|
||||
|
||||
if (i < skip) {
|
||||
ASSERT_EQ(memblock.reserved.cnt, i + 1);
|
||||
ASSERT_EQ(memblock.reserved.total_size, (i + 1) * MEM_SIZE);
|
||||
} else {
|
||||
ASSERT_EQ(memblock.reserved.cnt, i);
|
||||
ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
orig_region = memblock.reserved.regions;
|
||||
|
||||
/* This reserve the 129 memory_region, and makes it double array. */
|
||||
memblock_reserve(MEMORY_BASE(skip), MEM_SIZE);
|
||||
|
||||
/*
|
||||
* This is the memory region size used by the doubled reserved.regions,
|
||||
* and it has been reserved due to it has been used. The size is used to
|
||||
* calculate the total_size that the memblock.reserved have now.
|
||||
*/
|
||||
new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) *
|
||||
sizeof(struct memblock_region));
|
||||
/*
|
||||
* The double_array() will find a free memory region as the new
|
||||
* reserved.regions, and the used memory region will be reserved, so
|
||||
* there will be one more region exist in the reserved memblock. And the
|
||||
* one more reserved region's size is new_reserved_regions_size.
|
||||
*/
|
||||
ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2);
|
||||
ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
|
||||
new_reserved_regions_size);
|
||||
ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
|
||||
|
||||
/*
|
||||
* Now memblock_double_array() works fine. Let's check after the
|
||||
* double_array(), the memblock_reserve() still works as normal.
|
||||
*/
|
||||
memblock_reserve(r.base, r.size);
|
||||
ASSERT_EQ(memblock.reserved.regions[0].base, r.base);
|
||||
ASSERT_EQ(memblock.reserved.regions[0].size, r.size);
|
||||
|
||||
ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3);
|
||||
ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
|
||||
new_reserved_regions_size +
|
||||
r.size);
|
||||
ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
|
||||
|
||||
dummy_physical_memory_cleanup();
|
||||
|
||||
/*
|
||||
* The current reserved.regions is occupying a range of memory that
|
||||
* allocated from dummy_physical_memory_init(). After free the memory,
|
||||
* we must not use it. So restore the origin memory region to make sure
|
||||
* the tests can run as normal and not affected by the double array.
|
||||
*/
|
||||
memblock.reserved.regions = orig_region;
|
||||
memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
|
||||
}
|
||||
|
||||
test_pass_pop();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* A test that trying to reserve the 129th memory block at all locations.
|
||||
* Expect to trigger memblock_double_array() to double the
|
||||
* memblock.memory.max, find a new valid memory as reserved.regions. And make
|
||||
* sure it doesn't conflict with the range we want to reserve.
|
||||
*
|
||||
* For example, we have 128 regions in reserved and now want to reserve
|
||||
* the skipped one. Since reserved is full, memblock_double_array() would find
|
||||
* an available range in memory for the new array. We intended to put two
|
||||
* ranges in memory with one is the exact range of the skipped one. Before
|
||||
* commit 48c3b583bbdd ("mm/memblock: fix overlapping allocation when doubling
|
||||
* reserved array"), the new array would sits in the skipped range which is a
|
||||
* conflict. The expected new array should be allocated from memory.regions[0].
|
||||
*
|
||||
* 0 1
|
||||
* memory +-------+ +-------+
|
||||
* | 32K | | 32K |
|
||||
* +-------+ ------+-------+-------+-------+
|
||||
* |<-32K->|<-32K->|<-32K->|
|
||||
*
|
||||
* 0 skipped 127
|
||||
* reserved +-------+ ......... +-------+
|
||||
* | 32K | . 32K . ... | 32K |
|
||||
* +-------+-------+-------+ +-------+
|
||||
* |<-32K->|
|
||||
* ^
|
||||
* |
|
||||
* |
|
||||
* skipped one
|
||||
*/
|
||||
/* Keep the gap so these memory region will not be merged. */
|
||||
#define MEMORY_BASE_OFFSET(idx, offset) ((offset) + (MEM_SIZE * 2) * (idx))
|
||||
static int memblock_reserve_many_may_conflict_check(void)
|
||||
{
|
||||
int i, skip;
|
||||
void *orig_region;
|
||||
struct region r = {
|
||||
.base = SZ_16K,
|
||||
.size = SZ_16K,
|
||||
};
|
||||
phys_addr_t new_reserved_regions_size;
|
||||
|
||||
/*
|
||||
* 0 1 129
|
||||
* +---+ +---+ +---+
|
||||
* |32K| |32K| .. |32K|
|
||||
* +---+ +---+ +---+
|
||||
*
|
||||
* Pre-allocate the range for 129 memory block + one range for double
|
||||
* memblock.reserved.regions at idx 0.
|
||||
*/
|
||||
dummy_physical_memory_init();
|
||||
phys_addr_t memory_base = dummy_physical_memory_base();
|
||||
phys_addr_t offset = PAGE_ALIGN(memory_base);
|
||||
|
||||
PREFIX_PUSH();
|
||||
|
||||
/* Reserve the 129th memory block for all possible positions*/
|
||||
for (skip = 1; skip <= INIT_MEMBLOCK_REGIONS + 1; skip++) {
|
||||
reset_memblock_regions();
|
||||
memblock_allow_resize();
|
||||
|
||||
reset_memblock_attributes();
|
||||
/* Add a valid memory region used by double_array(). */
|
||||
memblock_add(MEMORY_BASE_OFFSET(0, offset), MEM_SIZE);
|
||||
/*
|
||||
* Add a memory region which will be reserved as 129th memory
|
||||
* region. This is not expected to be used by double_array().
|
||||
*/
|
||||
memblock_add(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE);
|
||||
|
||||
for (i = 1; i <= INIT_MEMBLOCK_REGIONS + 1; i++) {
|
||||
if (i == skip)
|
||||
continue;
|
||||
|
||||
/* Reserve some fakes memory region to fulfill the memblock. */
|
||||
memblock_reserve(MEMORY_BASE_OFFSET(i, offset), MEM_SIZE);
|
||||
|
||||
if (i < skip) {
|
||||
ASSERT_EQ(memblock.reserved.cnt, i);
|
||||
ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE);
|
||||
} else {
|
||||
ASSERT_EQ(memblock.reserved.cnt, i - 1);
|
||||
ASSERT_EQ(memblock.reserved.total_size, (i - 1) * MEM_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
orig_region = memblock.reserved.regions;
|
||||
|
||||
/* This reserve the 129 memory_region, and makes it double array. */
|
||||
memblock_reserve(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE);
|
||||
|
||||
/*
|
||||
* This is the memory region size used by the doubled reserved.regions,
|
||||
* and it has been reserved due to it has been used. The size is used to
|
||||
* calculate the total_size that the memblock.reserved have now.
|
||||
*/
|
||||
new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) *
|
||||
sizeof(struct memblock_region));
|
||||
/*
|
||||
* The double_array() will find a free memory region as the new
|
||||
* reserved.regions, and the used memory region will be reserved, so
|
||||
* there will be one more region exist in the reserved memblock. And the
|
||||
* one more reserved region's size is new_reserved_regions_size.
|
||||
*/
|
||||
ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2);
|
||||
ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
|
||||
new_reserved_regions_size);
|
||||
ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
|
||||
|
||||
/*
|
||||
* The first reserved region is allocated for double array
|
||||
* with the size of new_reserved_regions_size and the base to be
|
||||
* MEMORY_BASE_OFFSET(0, offset) + SZ_32K - new_reserved_regions_size
|
||||
*/
|
||||
ASSERT_EQ(memblock.reserved.regions[0].base + memblock.reserved.regions[0].size,
|
||||
MEMORY_BASE_OFFSET(0, offset) + SZ_32K);
|
||||
ASSERT_EQ(memblock.reserved.regions[0].size, new_reserved_regions_size);
|
||||
|
||||
/*
|
||||
* Now memblock_double_array() works fine. Let's check after the
|
||||
* double_array(), the memblock_reserve() still works as normal.
|
||||
*/
|
||||
memblock_reserve(r.base, r.size);
|
||||
ASSERT_EQ(memblock.reserved.regions[0].base, r.base);
|
||||
ASSERT_EQ(memblock.reserved.regions[0].size, r.size);
|
||||
|
||||
ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3);
|
||||
ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
|
||||
new_reserved_regions_size +
|
||||
r.size);
|
||||
ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
|
||||
|
||||
/*
|
||||
* The current reserved.regions is occupying a range of memory that
|
||||
* allocated from dummy_physical_memory_init(). After free the memory,
|
||||
* we must not use it. So restore the origin memory region to make sure
|
||||
* the tests can run as normal and not affected by the double array.
|
||||
*/
|
||||
memblock.reserved.regions = orig_region;
|
||||
memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
|
||||
}
|
||||
|
||||
dummy_physical_memory_cleanup();
|
||||
|
||||
test_pass_pop();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int memblock_reserve_checks(void)
|
||||
{
|
||||
prefix_reset();
|
||||
@ -997,6 +1253,8 @@ static int memblock_reserve_checks(void)
|
||||
memblock_reserve_between_check();
|
||||
memblock_reserve_near_max_check();
|
||||
memblock_reserve_many_check();
|
||||
memblock_reserve_all_locations_check();
|
||||
memblock_reserve_many_may_conflict_check();
|
||||
|
||||
prefix_pop();
|
||||
|
||||
@ -1295,7 +1553,7 @@ static int memblock_remove_only_region_check(void)
|
||||
ASSERT_EQ(rgn->base, 0);
|
||||
ASSERT_EQ(rgn->size, 0);
|
||||
|
||||
ASSERT_EQ(memblock.memory.cnt, 1);
|
||||
ASSERT_EQ(memblock.memory.cnt, 0);
|
||||
ASSERT_EQ(memblock.memory.total_size, 0);
|
||||
|
||||
test_pass_pop();
|
||||
@ -1723,7 +1981,7 @@ static int memblock_free_only_region_check(void)
|
||||
ASSERT_EQ(rgn->base, 0);
|
||||
ASSERT_EQ(rgn->size, 0);
|
||||
|
||||
ASSERT_EQ(memblock.reserved.cnt, 1);
|
||||
ASSERT_EQ(memblock.reserved.cnt, 0);
|
||||
ASSERT_EQ(memblock.reserved.total_size, 0);
|
||||
|
||||
test_pass_pop();
|
||||
@ -2129,6 +2387,53 @@ static int memblock_trim_memory_checks(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int memblock_overlaps_region_check(void)
|
||||
{
|
||||
struct region r = {
|
||||
.base = SZ_1G,
|
||||
.size = SZ_4M
|
||||
};
|
||||
|
||||
PREFIX_PUSH();
|
||||
|
||||
reset_memblock_regions();
|
||||
memblock_add(r.base, r.size);
|
||||
|
||||
/* Far Away */
|
||||
ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1M, SZ_1M));
|
||||
ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_2G, SZ_1M));
|
||||
|
||||
/* Neighbor */
|
||||
ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_1M));
|
||||
ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_4M, SZ_1M));
|
||||
|
||||
/* Partial Overlap */
|
||||
ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_2M));
|
||||
ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_2M, SZ_2M));
|
||||
|
||||
/* Totally Overlap */
|
||||
ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G, SZ_4M));
|
||||
ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_2M, SZ_8M));
|
||||
ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_1M, SZ_1M));
|
||||
|
||||
test_pass_pop();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int memblock_overlaps_region_checks(void)
|
||||
{
|
||||
prefix_reset();
|
||||
prefix_push("memblock_overlaps_region");
|
||||
test_print("Running memblock_overlaps_region tests...\n");
|
||||
|
||||
memblock_overlaps_region_check();
|
||||
|
||||
prefix_pop();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int memblock_basic_checks(void)
|
||||
{
|
||||
memblock_initialization_check();
|
||||
@ -2138,6 +2443,7 @@ int memblock_basic_checks(void)
|
||||
memblock_free_checks();
|
||||
memblock_bottom_up_checks();
|
||||
memblock_trim_memory_checks();
|
||||
memblock_overlaps_region_checks();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -40,13 +40,13 @@ void reset_memblock_regions(void)
|
||||
{
|
||||
memset(memblock.memory.regions, 0,
|
||||
memblock.memory.cnt * sizeof(struct memblock_region));
|
||||
memblock.memory.cnt = 1;
|
||||
memblock.memory.cnt = 0;
|
||||
memblock.memory.max = INIT_MEMBLOCK_REGIONS;
|
||||
memblock.memory.total_size = 0;
|
||||
|
||||
memset(memblock.reserved.regions, 0,
|
||||
memblock.reserved.cnt * sizeof(struct memblock_region));
|
||||
memblock.reserved.cnt = 1;
|
||||
memblock.reserved.cnt = 0;
|
||||
memblock.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS;
|
||||
memblock.reserved.total_size = 0;
|
||||
}
|
||||
@ -61,7 +61,7 @@ void reset_memblock_attributes(void)
|
||||
|
||||
static inline void fill_memblock(void)
|
||||
{
|
||||
memset(memory_block.base, 1, MEM_SIZE);
|
||||
memset(memory_block.base, 1, PHYS_MEM_SIZE);
|
||||
}
|
||||
|
||||
void setup_memblock(void)
|
||||
@ -103,7 +103,7 @@ void setup_numa_memblock(const unsigned int node_fracs[])
|
||||
|
||||
void dummy_physical_memory_init(void)
|
||||
{
|
||||
memory_block.base = malloc(MEM_SIZE);
|
||||
memory_block.base = malloc(PHYS_MEM_SIZE);
|
||||
assert(memory_block.base);
|
||||
fill_memblock();
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <../selftests/kselftest.h>
|
||||
|
||||
#define MEM_SIZE SZ_32K
|
||||
#define PHYS_MEM_SIZE SZ_16M
|
||||
#define NUMA_NODES 8
|
||||
|
||||
#define INIT_MEMBLOCK_REGIONS 128
|
||||
@ -39,6 +40,9 @@ enum test_flags {
|
||||
assert((_expected) == (_seen)); \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_TRUE(_seen) ASSERT_EQ(true, _seen)
|
||||
#define ASSERT_FALSE(_seen) ASSERT_EQ(false, _seen)
|
||||
|
||||
/**
|
||||
* ASSERT_NE():
|
||||
* Check the condition
|
||||
|
Loading…
Reference in New Issue
Block a user