首页 > 代码库 > 内存管理初始化源码5:free_area_init_nodes

内存管理初始化源码5:free_area_init_nodes

  start_kernel ——> setup_arch ——> arch_mem_init ——> |——> bootmem_init

                                 |——> device_tree_init

                                 |——> sparse_init

                                   |——> plat_swiotlb_setup

                                 |——> paging_init

  我们看看paging_init做了什么?!

void __init paging_init(void)
{
        unsigned long max_zone_pfns[max_nr_zones];
        unsigned long lastpfn __maybe_unused;
        int i = 0;

        pagetable_init();

#ifdef config_highmem
        kmap_init();
#endif
        kmap_coherent_init();

#ifdef config_zone_dma
        max_zone_pfns[zone_dma] = max_dma_pfn;
#endif
#ifdef config_zone_dma32
        max_zone_pfns[zone_dma32] = max_dma32_pfn;
#endif
        max_zone_pfns[zone_normal] = max_low_pfn;
        lastpfn = max_low_pfn;
#ifdef config_highmem
        max_zone_pfns[zone_highmem] = highend_pfn;
        lastpfn = highend_pfn;
#endif
       /* 上述关于页表初始化就不说了,实在是看不懂!! */
    /*
     * 1, max_zone_pfns 是一个数组,MAX_NR_ZONES = 3
     * max_zones_pfns[0] = 131072 : 0 -> ZONE_NORMAL
     * max_zones_pfns[1] = 262144 : 1 -> ZONE_HIGHMEM
     *     max_zones_pfns[2] = 2155610112 : 2 -> ZONE_MOVABLE
   *   很明显,该数组是UMA系统内存结点的各个内存域的最大PFN.但是ZONE_MOVABLE是一个垃圾值,因为ZONE_MOVEABLE是一个虚拟内存域,而且此时该虚拟内存域的PFN还未计算。
     * 还要说明的一点是 max_zone_pfns[0],其实我们系统真正的低端内存的大小是 0 - 57344,此时的 131072 是512M的区域,也就是MIPS默认512M以下都是低端内存!
     * 2. free_area_init_nodes
    */
free_area_init_nodes(max_zone_pfns); }

 

初始化内存域和节点数据结构

    回忆上篇文章,我们设置了一个数组:early_node_map,到此我们通过基于体系结构相关代码获取了如下信息:

  ① 系统中各个内存域的页帧边界,保存在 max_zone_pfn 数组中。

  ② 各结点页帧的分配情况,保存在全局变量early_node_map中。

1. 管理数据结构的创建

  从内核2.6.10开始提供了一个通用的框架,用于将上述信息转换为伙伴系统预期的结点和内存域数据结构。在这以前,各个体系结构必须自行建立相关数据结构。现在体系结构只需建立简单结构,将繁重的工作交给free_area_init_nodes完成。

/* kernel/mm/page_alloc.c */

/**
 * free_area_init_nodes - Initialise all pg_data_t and zone data
 * @max_zone_pfn: an array of max PFNs for each zone
 *
 * This will call free_area_init_node() for each active node in the system.
 * Using the page ranges provided by add_active_range(), the size of each
 * zone in each node and their holes is calculated. If the maximum PFN
 * between two adjacent zones match, it is assumed that the zone is empty.
 * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed
 * that arch_max_dma32_pfn has no pages. It is also assumed that a zone
 * starts where the previous one ended. For example, ZONE_DMA32 starts
 * at arch_max_dma_pfn.
* 计算每个内存结点的内存域大小,其中的 holes 也会计算出来!
*/ void __init free_area_init_nodes(unsigned long *max_zone_pfn) { unsigned long nid; int i; /* Sort early_node_map as initialisation assumes it is sorted */ sort_node_map(); // 对early_node_map进行排序,后续初始化代码是认为是已经排序过的
  /*
   * 内核在 lib/sort.c 中提供了一个通用的堆排序实现,该函数采用了这个实现
  */
/* Record where the zone boundaries are */ memset(arch_zone_lowest_possible_pfn, 0, sizeof(arch_zone_lowest_possible_pfn)); memset(arch_zone_highest_possible_pfn, 0, sizeof(arch_zone_highest_possible_pfn)); arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions(); arch_zone_highest_possible_pfn[0] = max_zone_pfn[0]; /* arch_zone_lowest_possible_pfn[0] = 0; arch_zone_highest_possible_pfn[0] = 131072; */ for (i = 1; i < MAX_NR_ZONES; i++) { if (i == ZONE_MOVABLE) continue; arch_zone_lowest_possible_pfn[i] = arch_zone_highest_possible_pfn[i-1]; arch_zone_highest_possible_pfn[i] = max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]); }
   /*
   * arch_zone_lowest_possible_pfn[1] = 131072
   * arch_zone_highest_possible_pfn[1] = 262144
   */ arch_zone_lowest_possible_pfn[ZONE_MOVABLE]
= 0; arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0; /* Find the PFNs that ZONE_MOVABLE begins at in each node */ memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); find_zone_movable_pfns_for_nodes(zone_movable_pfn);
  /*
   * 由于ZONE_MOVABLE是一个虚拟内存域,不与真正的硬件内存域关联,该内存域的边界总是设置为0.内核只有在设置了内核命令参数kernelcore或movablecore之一时,该内存域才会存在。
   * 该内存域一般开始于各个结点的某个特定内存域的某一页帧号!响应的编号在find_zone_movable_pfns_for_nodes中计算。
  */
/* Print out the zone ranges */ printk("Zone PFN ranges:\n"); for (i = 0; i < MAX_NR_ZONES; i++) { if (i == ZONE_MOVABLE) continue; printk(" %-8s ", zone_names[i]); if (arch_zone_lowest_possible_pfn[i] == arch_zone_highest_possible_pfn[i]) printk("empty\n"); else printk("%0#10lx -> %0#10lx\n", arch_zone_lowest_possible_pfn[i], arch_zone_highest_possible_pfn[i]); } /* Print out the PFNs ZONE_MOVABLE begins at in each node */ printk("Movable zone start PFN for each node\n"); for (i = 0; i < MAX_NUMNODES; i++) { if (zone_movable_pfn[i]) printk(" Node %d: %lu\n", i, zone_movable_pfn[i]); } /* Print out the early_node_map[] */ printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); for (i = 0; i < nr_nodemap_entries; i++) printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid, early_node_map[i].start_pfn, early_node_map[i].end_pfn);
  /*
   * 打印结果:
   * Zone PFN ranges:
   * Normal 0x00000000 -> 0x00020000 【0 - 131071】
   * HightMem 0x00020000 -> 0x00040000 【131072 - 262144】
   * Movable zone start PFN for each node 【没有开启ZONE_MOVABLE】
   * early_node_map[2] active PFN ranges
   * 0: 0x00000000 -> 0x0000e000 【0 -> 53744】
  * 1: 0x00030000 -> 0x00040000 【196608 -> 262144】
  */
/* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids();
  /* 遍历各个内存结点,分别调用free_area_init_node创建相关数据结构 */
for_each_online_node(nid) { // 对于UMA系统,只调用1次 pg_data_t
*pgdat = NODE_DATA(nid); free_area_init_node(nid, NULL, find_min_pfn_for_node(nid), NULL); /* Any memory on that node */ if (pgdat->node_present_pages) node_set_state(nid, N_HIGH_MEMORY); // 判断该结点是否有内存,如果有,就将结点位图的标志设置为 N_HIGH_MEMORY check_for_regular_memory(pgdat); // 进一步检查地域ZONE_HIGHMEM的内存域中是否有内存,并据此在结点位图中相应地设置为N_NORMAL_MEMORY标志 } }

 

void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
        unsigned long node_start_pfn, unsigned long *zholes_size)
{
    pg_data_t *pgdat = NODE_DATA(nid);

    pgdat->node_id = nid;                         // pgdat->node_id = 0
    pgdat->node_start_pfn = node_start_pfn;               // pgdat->node_start_pfn = 0
    calculate_node_totalpages(pgdat, zones_size, zholes_size);    // 见下文

    alloc_node_mem_map(pgdat);                                    // 见下文
#ifdef CONFIG_FLAT_NODE_MEM_MAP
    printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
        nid, (unsigned long)pgdat,
        (unsigned long)pgdat->node_mem_map);
  /*
   * free_area_init_node: node 0, pgdat 807874e0, node_mem_map 81000000
  */
#endif printk("%d : *zones_size = %lu, *zholes_size = %lu", *zones_size, *zholes_size); free_area_init_core(pgdat, zones_size, zholes_size); }

 

static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
        unsigned long *zones_size, unsigned long *zholes_size)
{
    unsigned long realtotalpages, totalpages = 0;
    enum zone_type i;

    for (i = 0; i < MAX_NR_ZONES; i++)
        totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
                                zones_size);
    pgdat->node_spanned_pages = totalpages;

    realtotalpages = totalpages;
    for (i = 0; i < MAX_NR_ZONES; i++)
        realtotalpages -=
            zone_absent_pages_in_node(pgdat->node_id, i,
                                zholes_size);
    pgdat->node_present_pages = realtotalpages;
    printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
                            realtotalpages);

  /*
   * 计算内存结点的内存域信息:
   * pgdat->node_spanned_pages = 262144 【包含 holes】
   * pgdat->node_present_pages = 122880 【取出 holes】
  */
}
static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
{
    /* Skip empty nodes */
    if (!pgdat->node_spanned_pages)   // 跳过不包含内存域的结点
        return;

#ifdef CONFIG_FLAT_NODE_MEM_MAP
    /* ia64 gets its own node_mem_map, before this, without bootmem */
    if (!pgdat->node_mem_map) {     // 系统中的每个物理页帧对应着一个struct page结构体,node_mem_map存储的就是start_page开始的地址
        unsigned long size, start, end;
        struct page *map;

        /*
         * The zone‘s endpoints aren‘t required to be MAX_ORDER
         * aligned but the node_mem_map endpoints must be in order
         * for the buddy allocator to function correctly.
         */
        start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
        end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
        end = ALIGN(end, MAX_ORDER_NR_PAGES);
        size =  (end - start) * sizeof(struct page);
        /*
     * start = 0, end = 2621144, size = 8388608  【说明在创建 struct page 实例时,包含了hole也一起创建了 struct page】
     */
map = alloc_remap(pgdat->node_id, size); // 如果特定于体系结构的代码尚未建立内存映射,返回NULL if (!map) map = alloc_bootmem_node_nopanic(pgdat, size); // 使用bootmem allocator分配器进行内存分配 pgdat->node_mem_map = map + (pgdat->node_start_pfn - start); } #ifndef CONFIG_NEED_MULTIPLE_NODES /* * With no DISCONTIG, the global mem_map is just set as node 0‘s */ if (pgdat == NODE_DATA(0)) { mem_map = NODE_DATA(0)->node_mem_map; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP if (page_to_pfn(mem_map) != pgdat->node_start_pfn) mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ } #endif #endif /* CONFIG_FLAT_NODE_MEM_MAP */ }

  函数 free_area_init_core对 pgdata 中相关的数据结构进行初始化设置。

内存管理初始化源码5:free_area_init_nodes