线性地址空间的特征:

  • 进程的地址空间由允许进程使用的全部线性地址组成
  • 每个进程的线性地址空间是独立的
  • 内核通过线性区表示线性地址区间
  • 线性区由起始地址、长度和访问权限来描述
  • 线性区的长度和起始地址必须是4096的倍数(4K对齐)

进程获得新线性区的典型情况:

  • 创建新的进程,分配全新的地址空间
  • 正在运行的进程装入一个完全不同的程序(exec系列函数)
  • 正在运行的进程对一个文件执行内存映射(mmap)
  • 进程持续像用户态栈增加数据
  • 进程创建一个IPC共享线性区和其他进程共享内存
  • 进程通过malloc()之类的函数扩展动态内存区,最终是通过系统调用brk()mmap()进行内存分配的

与创建、删除线性区相关的系统调用:

系统调用 说明
brk() 改变堆的大小
execve() 装入新的可执行文件
_exit() 结束进程,撤销地址空间
fork() 创建新进程,创建地址空间
mmap(), mmap2() 创建内存映射
mremap() 重新内存映射,修改线性区
remap_file_pages() 为文件创建非线性映射
munmap() 取消内存映射
shmat() 创建共享线性区
shmdt() 取消共享线性区

内存描述符

进程描述符中有两个指向内存描述符的指针,分别是mm指针和active_mm指针。

内存描述符struct mm_struct的定义如下:

// include/linux/sched.h
struct task_struct
{
    /*...*/
	struct mm_struct		*mm;	// 内核线程为NULL
	struct mm_struct		*active_mm;
    /*...*/
}

struct mm_struct {
	struct vm_area_struct * mmap;		/* list of VMAs */
	struct rb_root mm_rb;               /* red-black tree root of VMAs */
	struct vm_area_struct * mmap_cache;	/* last find_vma result */
	unsigned long (*get_unmapped_area) (struct file *filp,
				unsigned long addr, unsigned long len,
				unsigned long pgoff, unsigned long flags);  /* 搜索有效地址区间的方法 */
	void (*unmap_area) (struct vm_area_struct *area);       /* 释放线性区的方法 */
	unsigned long mmap_base;		/* 第一个匿名线性区或文件内存映射的地址 */
	unsigned long free_area_cache;		/* 内核从这个地址开始搜索空闲区间 */
	pgd_t * pgd;    // 页全局目录指针
	atomic_t mm_users;			/* How many users with user space? */
	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
	int map_count;				/* number of VMAs */
	struct rw_semaphore mmap_sem;
	spinlock_t page_table_lock;		/* Protects page tables, mm->rss, mm->anon_rss */

	struct list_head mmlist;		/* List of maybe swapped mm's.  These are globally strung
						 * together off init_mm.mmlist, and are protected
						 * by mmlist_lock
						 */

	unsigned long start_code, end_code, start_data, end_data;
	unsigned long start_brk, brk, start_stack;
	unsigned long arg_start, arg_end, env_start, env_end;
	unsigned long rss, anon_rss, total_vm, locked_vm, shared_vm;
	unsigned long exec_vm, stack_vm, reserved_vm, def_flags, nr_ptes;

	unsigned long saved_auxv[42]; /* for /proc/PID/auxv */

	unsigned dumpable:1;
	cpumask_t cpu_vm_mask;

	/* Architecture-specific MM context */
	mm_context_t context;

	/* Token based thrashing protection. */
	unsigned long swap_token_time;
	char recent_pagein;

	/* coredumping support */
	int core_waiters;
	struct completion *core_startup_done, core_done;

	/* aio bits */
	rwlock_t		ioctx_list_lock;
	struct kioctx		*ioctx_list;

	struct kioctx		default_kioctx;

	unsigned long hiwater_rss;	/* High-water RSS usage */
	unsigned long hiwater_vm;	/* High-water virtual memory usage */
};

其中有几个比较关键的字段,解释如下:

  • mmap是线性区struct vm_area_struct的链表(见下文),这个链表中的节点是按照内存地址的升序排列的
  • mm_rb是线性区组成的红黑树的根节点
  • mmap_cache是最近使用的struct vm_area_struct节点的缓存
  • start_xxxend_xxx是各个线性区的起始和结束地址,即线性区的past-the-end(逾尾)地址
  • mm_usersmm_count是两个计数器,mm_users表示用户空间中的用户个数,mm_count表示该内存描述符的引用计数(其中mm_user > 0的数量无论是几,总是算作1),当mm_users减为0时mm_count需要减1,当mm_count减为0的时候需要释放mm_struct

mm_struct

内核线程的内存描述符

  • 内核线程仅运行在内核态,不会访问低于TASK_SIZE的内存地址,不用线性区,所以mm_struct的很多字段对内核线程是没有意义的。

  • 每个进程描述符task_struct种包含两个内存描述符指针,mm字段和active_mm字段。对于普通进程,这两个字段存放相同的指针,但是内核线程的mm字段为NULL,当内核线程运行时,它的active_mm字段初始化为前一个运行进程的active_mm值。

线性区 VMA

Virtual memory area也称为线性区,用struct vm_area_struct描述,定义如下:

struct vm_area_struct {
	struct mm_struct * vm_mm;	/* The address space we belong to. */
	unsigned long vm_start;		/* Our start address within vm_mm. */
	unsigned long vm_end;		/* The first byte after our end address within vm_mm. */

	/* linked list of VM areas per task, sorted by address */
	struct vm_area_struct *vm_next;

	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
	unsigned long vm_flags;		/* Flags, listed below. */

	struct rb_node vm_rb;

	/*
	 * For areas with an address space and backing store,
	 * linkage into the address_space->i_mmap prio tree, or
	 * linkage to the list of like vmas hanging off its node, or
	 * linkage of vma in the address_space->i_mmap_nonlinear list.
	 */
	union {
		struct {
			struct list_head list;
			void *parent;	/* aligns with prio_tree_node parent */
			struct vm_area_struct *head;
		} vm_set;

		struct raw_prio_tree_node prio_tree_node;
	} shared;

	/*
	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
	 * list, after a COW of one of the file pages.  A MAP_SHARED vma
	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
	 * or brk vma (with NULL file) can only be in an anon_vma list.
	 */
	struct list_head anon_vma_node;	/* Serialized by anon_vma->lock */
	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */

	/* Function pointers to deal with this struct. */
	struct vm_operations_struct * vm_ops;

	/* Information about our backing store: */
	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
					   units, *not* PAGE_CACHE_SIZE */
	struct file * vm_file;		/* File we map to (can be NULL). */
	void * vm_private_data;		/* was vm_pte (shared mem) */
	unsigned long vm_truncate_count;/* truncate_count or restart_addr */

#ifndef CONFIG_MMU
	atomic_t vm_usage;		/* refcount (VMAs shared if !MMU) */
#endif
#ifdef CONFIG_NUMA
	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
#endif
};

// include/linux/mm.h
struct vm_operations_struct {
	void (*open)(struct vm_area_struct * area);
	void (*close)(struct vm_area_struct * area);
	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
#ifdef CONFIG_NUMA
	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
					unsigned long addr);
#endif
};

其中有几个比较关键的字段,解释如下:

  • vm_mm指针指向该线性区所属的内存描述符
  • vm_start表示线性区的起始地址
  • vm_end表示线性区的结束地址,即线性区的past-the-end地址
  • vm_next线性区链表中的下一个线性区
  • vm_flags权限标志
  • vm_rb红黑树
  • vm_file映射文件
  • vm_pgoff映射文件中的偏移量
  • vm_ops是线性区的操作函数指针,包含了openclosenopagepopulate等函数
    • open将线性区添加到进程时调用
    • close将线性区从进程中删除时调用
    • nopage缺页(试图访问不存在的内存页,但是内存页属于线性区)时调用,由缺页异常处理程序调用
    • populate设置线性区的线性地址所对应的页表项时调用,主要用于非线性文件内存映射(这里还没看明白)

下面的图片形象地描述了线性区的结构: VMA

  • 进程的线性区不重叠,内核在分配新的线性区时会尝试将新分配的线性区和临近的线性区合并,如下图(a)所示
  • 如果不能合并,将创建新的线性区,如下图(b)所示
  • 内核在删除线性区时,如果删除的区域在线性区的末尾,内核会缩小线性区,如下图(c)所示
  • 如果删除的区域在线性区的中间,则会将一个线性区拆分成两个新的线性区,如下图(d)所示 vma3

线性区的处理

相关函数:

  • find_vma()查找给定地址的最邻近线性区
  • find_vma_intersection()查找一个与给定的地址区间相交的线性区
  • get_unmapped_area()查找空闲的区间
  • insert_vm_struct()插入线性区
  • do_mmap()分配线性地址区间
  • do_munmap()释放线性地址区间
  • split_vma()拆分线性区
  • unmap_region()便利线性区链表病释放页框

缺页异常处理程序

// arch/i386/kernel/traps.c
// 注册中断和异常处理函数
void __init trap_init(void)
{
    // ...
    set_intr_gate(14,&page_fault);
    // ...
}

// arch/i386/mm/fault.c
// pt_regs是异常发生时的寄存器的值
fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
	// ...
}

参考