1、基础知识
我们都知道Linux内存组织管理结构架构,顶层是struct pglist_data
,然后再到struct zone
,最后是struct page
。大概的管理结构是这样的:
根据物理内存的地址范围可划分不同的zone,每个zone里的内存由buddy系统所管理,buddy系统管理着不同order大小的链表,在每个不同大小order链表的内部,又根据migrate_type类型进行分类保存。
2、migrate_type作用
为了更好的管理物理内存,操作系统进一步抽象出页块的概念,通常一个页块的大小是2^(MAX_ORDER-1)个页面(4MB)。每个页块对应一个迁移类型migrate_type
,buddy系统中的页面,根据其所在migrate_type
链表,可知道该页是属于哪个migrate_type
的页块。
问: 为什么要抽象出页块,并给页块指定迁移类型呢?
答: 因为要实现页面规整功能。在buddy系统中的页面不断被线程所申请使用,页面外部碎片化就会很严重,很容易就无法分配出连续大order的页面,而且我们也无法进行页面规整,因为我们不知道已分配出的页面是否可以通过将数据迁移到其他页面进行回收。但是当我们有了迁移类型后,我们完全可以知道已分配出的页面数据什么迁移类型,是否支持回收。
例如:当buddy系统中存留page0、page2、page3,page1已经被分配出去,但是page1的所属页块的迁移类型是MIGRATE_MOVABLE
,如果我们想用page0-3满足作为order2的分配请求,我们完全可以将page1的数据迁移到page5上,同时再将page1上的映射关系也转移到page5上,这样page1就可以回收回来,与其他page形成order2的页面,满足order2的分配请求。
3、页块的迁移类型存储
我们上面了解到每个页块对应一个迁移类型,这个迁移类型是在哪里存储的呢?另外,如何通过pfn找到对应的页块,进而获取到迁移类型呢?
先明确两个特点:
1、大部分物理内存页面一开始存放在MIGRATE_MOVABLE
链表中
2、大部分物理内存页面初始化时存放在order为10的链表中
当我们要使用MIGRATE_UNMOVABLE
的页面时,会fallback到MIGRATE_MOVABLE
,并将整个页块的迁移类型都改变为MIGRATE_UNMOVABLE
。
start_kernel()
-> setup_arch()
--> bootmem_init()
---> zone_sizes_init()
----> free_area_init_node()
-----> free_area_init_core()
/*
* Set up the zone data structures:
* - mark all pages reserved
* - mark all memory queues empty
* - clear the memory bitmaps
*
* NOTE: pgdat should get zeroed by caller.
* NOTE: this function is only called during early init.
*/
static void __init free_area_init_core(struct pglist_data *pgdat)
{
enum zone_type j;
int nid = pgdat->node_id;
pgdat_init_internals(pgdat);
pgdat->per_cpu_nodestats = &boot_nodestats;
for (j = 0; j < MAX_NR_ZONES; j++) { // 遍历当前pglist_data所有的zone
struct zone *zone = pgdat->node_zones + j;
unsigned long size, freesize, memmap_pages;
unsigned long zone_start_pfn = zone->zone_start_pfn;
...
set_pageblock_order(); // 配置页块大小
setup_usemap(pgdat, zone, zone_start_pfn, size); // 设置当前zone内页块的迁移类型保存空间
init_currently_empty_zone(zone, zone_start_pfn, size);
memmap_init(size, nid, j, zone_start_pfn); // 初始化当前zone
}
}
3.1 首先来看一下set_pageblock_order()
/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
void __init set_pageblock_order(void)
{
unsigned int order;
/* Check that pageblock_nr_pages has not already been setup */
if (pageblock_order)
return;
if (HPAGE_SHIFT > PAGE_SHIFT)
order = HUGETLB_PAGE_ORDER;
else
order = MAX_ORDER - 1;
/*
* Assume the largest contiguous order of interest is a huge page.
* This value may be variable depending on boot parameters on IA64 and
* powerpc.
*/
pageblock_order = order;
}
在没开启HUGETLB_PAGE
特性,pageblock_order就为MAX_ORDER-1,也就是10。
3.2 再来看一下setup_usemap()
zone->pageblock_flags
保存当前zone内所有页块的迁移类型信息:
static void __ref setup_usemap(struct pglist_data *pgdat,
struct zone *zone,
unsigned long zone_start_pfn,
unsigned long zonesize)
{
// 这里计算要保存zone所有页块对应的迁移类型需要多大的空间
unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
zone->pageblock_flags = NULL;
if (usemapsize) {
// 为迁移类型块所占空间分配内存
zone->pageblock_flags =
memblock_alloc_node(usemapsize, SMP_CACHE_BYTES,
pgdat->node_id);
if (!zone->pageblock_flags)
panic("Failed to allocate %ld bytes for zone %s pageblock flags on node %d\n",
usemapsize, zone->name, pgdat->node_id);
}
}
// pageblock_nr_pages表示一个页块包含的页面数量
#define pageblock_nr_pages (1UL << pageblock_order)
/*
* Calculate the size of the zone->blockflags rounded to an unsigned long
* Start by making sure zonesize is a multiple of pageblock_order by rounding
* up. Then use 1 NR_PAGEBLOCK_BITS worth of bits per pageblock, finally
* round what is now in bits to nearest long in bits, then return it in
* bytes.
*/
static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize)
{
unsigned long usemapsize;
// 这两步主要是进行对齐,得到该zone包含的所有页块的页面总数,对齐按照pageblock_nr_pages去向上取整,将整个zone空间划分成一个个页块大小,不足一个页块的部分,也作为一个页块处理
zonesize += zone_start_pfn & (pageblock_nr_pages-1);
usemapsize = roundup(zonesize, pageblock_nr_pages);
// 得到zone存放的页块总数
usemapsize = usemapsize >> pageblock_order;
// 每个页块的迁移类型所占空间是NR_PAGEBLOCK_BITS个位,这个宏的值是4,也就是说一个页块的迁移类型需要4个bit来表示
usemapsize *= NR_PAGEBLOCK_BITS;
// 8表示一个字节包含8个bit,sizeof(unsigned long)表示一个unsigned long类型里有几个字节,其实这里就是计算zone内所有页块的迁移类型需要多大的内存空间(需要多少个unsigned long存储)
usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long));
// / 8 因为一个unsigned long代表8个字节,所以除以8,转换成所需字节数量
return usemapsize / 8;
}
3.3 最后看一下memmap_init()
void __meminit __weak memmap_init(unsigned long size, int nid,
unsigned long zone,
unsigned long range_start_pfn)
{
unsigned long start_pfn, end_pfn;
unsigned long range_end_pfn = range_start_pfn + size;
int i;
// 遍历该zone的合法物理内存区域
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
if (end_pfn > start_pfn) {
size = end_pfn - start_pfn;
// 初始化该区域,并且设置该区域的页块迁移类型是MIGRATE_MOVABLE
memmap_init_zone(size, nid, zone, start_pfn,
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
}
}
}
/*
* Initially all pages are reserved - free ones are freed
* up by memblock_free_all() once the early boot process is
* done. Non-atomic initialization, single-pass.
*
* All aligned pageblocks are initialized to the specified migratetype
* (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
* zone stats (e.g., nr_isolate_pageblock) are touched.
*/
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn,
enum meminit_context context,
struct vmem_altmap *altmap, int migratetype)
{
unsigned long pfn, end_pfn = start_pfn + size;
struct page *page;
if (highest_memmap_pfn < end_pfn - 1)
highest_memmap_pfn = end_pfn - 1;
...
for (pfn = start_pfn; pfn < end_pfn; ) {
/*
* There can be holes in boot-time mem_map[]s handed to this
* function. They do not exist on hotplugged memory.
*/
if (context == MEMINIT_EARLY) {
if (overlap_memmap_init(zone, &pfn))
continue;
if (defer_init(nid, pfn, end_pfn))
break;
}
// 根据pfn获取到struct page对象
page = pfn_to_page(pfn);
__init_single_page(page, pfn, zone, nid);
if (context == MEMINIT_HOTPLUG)
__SetPageReserved(page);
/*
* Usually, we want to mark the pageblock MIGRATE_MOVABLE,
* such that unmovable allocations won't be scattered all
* over the place during system boot.
*/
// 如果该pfn是以页块包含页面数量对齐的话
if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
// 设置迁移类型,该迁移类型是MIGRATE_MOVABLE
set_pageblock_migratetype(page, migratetype);
cond_resched();
}
pfn++;
}
}
void set_pageblock_migratetype(struct page *page, int migratetype)
{
if (unlikely(page_group_by_mobility_disabled &&
migratetype < MIGRATE_PCPTYPES))
migratetype = MIGRATE_UNMOVABLE;
set_pfnblock_flags_mask(page, (unsigned long)migratetype,
page_to_pfn(page), MIGRATETYPE_MASK);
}
/* Return a pointer to the bitmap storing bits affecting a block of pages */
static inline unsigned long *get_pageblock_bitmap(struct page *page,
unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM // 如果开了SPARSMEM布局,则走该路径
return section_to_usemap(__pfn_to_section(pfn));
#else // 否则使用zone->pageblock_flags
return page_zone(page)->pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
}
static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
pfn &= (PAGES_PER_SECTION-1);
#else
pfn = pfn - round_down(page_zone(page)->zone_start_pfn, pageblock_nr_pages);
#endif /* CONFIG_SPARSEMEM */
// 根据pfn获取到所处的页块号,每个页块号对应的迁移类型需要NR_PAGEBLOCK_BITS个bit存储,* NR_PAGEBLOCK_BITS获取到该页块的迁移类型保存的起始bit位置
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
}
/**
* set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
* @page: The page within the block of interest
* @flags: The flags to set
* @pfn: The target page frame number
* @mask: mask of bits that the caller is interested in
*/
void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long pfn,
unsigned long mask)
{
unsigned long *bitmap;
unsigned long bitidx, word_bitidx;
unsigned long old_word, word;
BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
// 获取zone->pageblock_flags,保存该zone所有页块的迁移类型内存区域
bitmap = get_pageblock_bitmap(page, pfn);
// 找到该pfn应保存该页块迁移类型的起始bit位置
bitidx = pfn_to_bitidx(page, pfn);
word_bitidx = bitidx / BITS_PER_LONG;
bitidx &= (BITS_PER_LONG-1);
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
mask <<= bitidx;
flags <<= bitidx;
// 保存迁移类型操作
word = READ_ONCE(bitmap[word_bitidx]);
for (;;) {
old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
if (word == old_word)
break;
word = old_word;
}
}
关于migrate_type初步探索先到这里,感谢各位读者浏览!!!
预知后续如何,请看下个博文的分析。