首页 > 代码库 > Linux内核架构读书笔记 - 2.5.3 处理优先级

Linux内核架构读书笔记 - 2.5.3 处理优先级

 

 1 优先级的内核表示

  内核使用 0 - 139 表示内部优先级,值越低,优先级越高.0 -99 实时进程使用 nice 值 [-20,19]映射到范围100 - 139,如下图

  

  内核定义了一系列宏来辅助优先级之间的转换

  sched.h

 1 /*
 2  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
 3  * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
 4  * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
 5  * values are inverted: lower p->prio value means higher priority.
 6  *
 7  * The MAX_USER_RT_PRIO value allows the actual maximum
 8  * RT priority to be separate from the value exported to
 9  * user-space.  This allows kernel threads to set their
10  * priority to a value higher than any user task. Note:
11  * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
12  */
13 
14 #define MAX_USER_RT_PRIO    100
15 #define MAX_RT_PRIO        MAX_USER_RT_PRIO
16 
17 #define MAX_PRIO        (MAX_RT_PRIO + 40)
18 #define DEFAULT_PRIO        (MAX_RT_PRIO + 20)

  sched.c

1 /*
2  * Convert user-nice values [ -20 ... 0 ... 19 ]
3  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
4  * and back.
5  */
6 #define NICE_TO_PRIO(nice)    (MAX_RT_PRIO + (nice) + 20)
7 #define PRIO_TO_NICE(prio)    ((prio) - MAX_RT_PRIO - 20)
8 #define TASK_NICE(p)        PRIO_TO_NICE((p)->static_prio)

  2 优先级计算

  动态优先级 task_struct->prio

  普通优先级 task_struct->normal_prio

  静态优先级   task_struct->static_prio (计算起点,已经设置好)

  sched.c

 1 /*
 2  * Calculate the current priority, i.e. the priority
 3  * taken into account by the scheduler. This value might
 4  * be boosted by RT tasks, or might be boosted by
 5  * interactivity modifiers. Will be RT if the task got
 6  * RT-boosted. If not then it returns p->normal_prio.
 7  */
 8 static int effective_prio(struct task_struct *p)
 9 {
10     p->normal_prio = normal_prio(p);
11     /*
12      * If we are RT tasks or we were boosted to RT priority,
13      * keep the priority unchanged. Otherwise, update priority
14      * to the normal priority:
15      */
16     if (!rt_prio(p->prio))
17         return p->normal_prio;
18     return p->prio;
19 }

 rt_prio检测普通优先级是否在实时范围中

1 static inline int rt_prio(int prio)
2 {
3     if (unlikely(prio < MAX_RT_PRIO))
4         return 1;
5     return 0;
6 }

 普通优先级计算分为 普通进程 和 实时进程 ,普通进程用__normal_prio,实时进程需要rt_priority设置,rt_priority越高,表示优先级越高的实时进程,内核正好相反,因此内核用

 MAX_RT_PRIO-1 - p->rt_priority 计算
/*
 * __normal_prio - return the priority that is based on the static prio
 */
static inline int __normal_prio(struct task_struct *p)
{
    return p->static_prio;
}

/*
 * Calculate the expected normal priority: i.e. priority
 * without taking RT-inheritance into account. Might be
 * boosted by interactivity modifiers. Changes upon fork,
 * setprio syscalls, and whenever the interactivity
 * estimator recalculates.
 */
static inline int normal_prio(struct task_struct *p)
{
    int prio;

    if (task_has_rt_policy(p))
        prio = MAX_RT_PRIO-1 - p->rt_priority;
    else
        prio = __normal_prio(p);
    return prio;
}

  下图描述了不同类型上述计算结果

  

  注意以下两点:

  •   新建进程用wake_up_new_task唤醒,或使用nice 系统调用改变静态优先级,使用上述方法计算nice
  •   进程分支出子进程,子进程静态优先级继承父进程,子进程的动态优先级,子进程的动态优先级(prio)设置为父进程的普通优先级.

 3 计算负载权重

  set_load_weight负责根据进程类型及静态优先级计算负载权重

  sched.h

1 struct load_weight {
2     unsigned long weight, inv_weight;
3 };

  一般来说 降低一个 nice值,多获得10% CPU,反之也一样,为了执行该策略,内核将优先级转换为权重,如下

 1 /*
 2  * Nice levels are multiplicative, with a gentle 10% change for every
 3  * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
 4  * nice 1, it will get ~10% less CPU time than another CPU-bound task
 5  * that remained on nice 0.
 6  *
 7  * The "10% effect" is relative and cumulative: from _any_ nice level,
 8  * if you go up 1 level, it‘s -10% CPU usage, if you go down 1 level
 9  * it‘s +10% CPU usage. (to achieve that we use a multiplier of 1.25.
10  * If a task goes up by ~10% and another task goes down by ~10% then
11  * the relative distance between them is ~25%.)
12  */
13 static const int prio_to_weight[40] = {
14  /* -20 */     88761,     71755,     56483,     46273,     36291,
15  /* -15 */     29154,     23254,     18705,     14949,     11916,
16  /* -10 */      9548,      7620,      6100,      4904,      3906,
17  /*  -5 */      3121,      2501,      1991,      1586,      1277,
18  /*   0 */      1024,       820,       655,       526,       423,
19  /*   5 */       335,       272,       215,       172,       137,
20  /*  10 */       110,        87,        70,        56,        45,
21  /*  15 */        36,        29,        23,        18,        15,
22 };

  具体转换代码如下,实时进程的权重是普通进程的2倍,SCHED_IDLE进程权重很小

1 #define WEIGHT_IDLEPRIO                3
2 #define WMULT_IDLEPRIO         1431655765
 1 static void set_load_weight(struct task_struct *p)
 2 {
 3     if (task_has_rt_policy(p)) {
 4         p->se.load.weight = prio_to_weight[0] * 2;
 5         p->se.load.inv_weight = prio_to_wmult[0] >> 1;
 6         return;
 7     }
 8 
 9     /*
10      * SCHED_IDLE tasks get minimal weight:
11      */
12     if (p->policy == SCHED_IDLE) {
13         p->se.load.weight = WEIGHT_IDLEPRIO;
14         p->se.load.inv_weight = WMULT_IDLEPRIO;
15         return;
16     }
17 
18     p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO];
19     p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
20 }

  进程队列也有一个负载权重,每次进程倍加入到内核队列的时候,会调用inc_nr_running,这样可以确保就绪队列跟踪记录有多少进程在运行,而且还将进程的权重添加到就绪队列的权重里面,从就绪队列移除时候也会调用对应的函数

 1 /*
 2  * Update delta_exec, delta_fair fields for rq.
 3  *
 4  * delta_fair clock advances at a rate inversely proportional to
 5  * total load (rq->load.weight) on the runqueue, while
 6  * delta_exec advances at the same rate as wall-clock (provided
 7  * cpu is not idle).
 8  *
 9  * delta_exec / delta_fair is a measure of the (smoothened) load on this
10  * runqueue over any given interval. This (smoothened) load is used
11  * during load balance.
12  *
13  * This function is called /before/ updating rq->load
14  * and when switching tasks.
15  */
16 static inline void inc_load(struct rq *rq, const struct task_struct *p)
17 {
18     update_load_add(&rq->load, p->se.load.weight);
19 }
20 
21 static inline void dec_load(struct rq *rq, const struct task_struct *p)
22 {
23     update_load_sub(&rq->load, p->se.load.weight);
24 }
25 
26 static void inc_nr_running(struct task_struct *p, struct rq *rq)
27 {
28     rq->nr_running++;
29     inc_load(rq, p);
30 }