struct sched_entity { /* For load-balancing: */struct load_weight load;unsignedlong runnable_weight;struct rb_node run_node;struct list_head group_node;unsignedint on_rq; u64 exec_start; u64 sum_exec_runtime; u64 vruntime; u64 prev_sum_exec_runtime; u64 nr_migrations;struct sched_statistics statistics;#ifdefCONFIG_FAIR_GROUP_SCHEDint depth;struct sched_entity *parent; /* rq on which this entity is (to be) queued: */struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */struct cfs_rq *my_q;#endif#ifdefCONFIG_SMP /* * Per entity load average tracking. * * Put into separate cache line so it does not * collide with read-mostly values above. */struct sched_avg avg;#endif};
在调度时,多个任务调度实体会首先区分是实时任务还是普通任务,然后通过以时间为顺序的红黑树结构组合起来,vruntime 最小的在树的左侧,vruntime最多的在树的右侧。以CFS策略为例,则会选择红黑树最左边的叶子节点作为下一个将获得 CPU 的任务。而这颗红黑树,我们称之为运行时队列(run queue),即struct rq。
/* * This is the main, per-CPU runqueue data structure. * * Locking rule: those places that want to lock multiple runqueues * (such as the load balancing or the thread migration code), lock * acquire operations must be ordered by ascending &runqueue. */struct rq { /* runqueue lock: */raw_spinlock_t lock; /* * nr_running and cpu_load should be in the same cacheline because * remote CPUs use both these fields when doing load calculation. */unsignedint nr_running;......#defineCPU_LOAD_IDX_MAX5unsignedlong cpu_load[CPU_LOAD_IDX_MAX];...... /* capture load from *all* tasks on this CPU: */struct load_weight load;unsignedlong nr_load_updates; u64 nr_switches;struct cfs_rq cfs;struct rt_rq rt;struct dl_rq dl;...... /* * This is part of a global counter where only the total sum * over all CPUs matters. A task can increase this counter on * one CPU and if it got migrated afterwards it may decrease * it on another CPU. Always updated under the runqueue lock: */unsignedlong nr_uninterruptible;struct task_struct *curr;struct task_struct *idle;struct task_struct *stop;unsignedlong next_balance;struct mm_struct *prev_mm;unsignedint clock_update_flags; u64 clock; /* Ensure that all clocks are in the same cache line */ u64 clock_task ____cacheline_aligned; u64 clock_pelt;unsignedlong lost_idle_time;atomic_t nr_iowait;...... /* calc_load related fields */unsignedlong calc_load_update;long calc_load_active;......};
/* CFS-related fields in a runqueue */struct cfs_rq {struct load_weight load;unsignedlong runnable_weight;unsignedint nr_running;unsignedint h_nr_running; u64 exec_clock; u64 min_vruntime;#ifndefCONFIG_64BIT u64 min_vruntime_copy;#endifstruct rb_root_cached tasks_timeline; /* * 'curr' points to currently running entity on this cfs_rq. * It is set to NULL otherwise (i.e when none are currently running). */struct sched_entity *curr;struct sched_entity *next;struct sched_entity *last;struct sched_entity *skip;......};
对结构体dl_rq有类似的定义,运行队列由红黑树结构体构成,并按照deadline策略进行管理
/* Deadline class' related fields in a runqueue */struct dl_rq { /* runqueue is an rbtree, ordered by deadline */struct rb_root_cached root;unsignedlong dl_nr_running;#ifdefCONFIG_SMP /* * Deadline values of the currently executing and the * earliest ready task on this rq. Caching these facilitates * the decision whether or not a ready but not running task * should migrate somewhere else. */struct { u64 curr; u64 next; } earliest_dl;unsignedlong dl_nr_migratory;int overloaded; /* * Tasks on this rq that can be pushed away. They are kept in * an rb-tree, ordered by tasks' deadlines, with caching * of the leftmost (earliest deadline) element. */struct rb_root_cached pushable_dl_tasks_root;#elsestruct dl_bw dl_bw;#endif /* * "Active utilization" for this runqueue: increased when a * task wakes up (becomes TASK_RUNNING) and decreased when a * task blocks */ u64 running_bw; /* * Utilization of the tasks "assigned" to this runqueue (including * the tasks that are in runqueue and the tasks that executed on this * CPU and blocked). Increased when a task moves to this runqueue, and * decreased when the task moves away (migrates, changes scheduling * policy, or terminates). * This is needed to compute the "inactive utilization" for the * runqueue (inactive utilization = this_bw - running_bw). */ u64 this_bw; u64 extra_bw; /* * Inverse of the fraction of CPU utilization that can be reclaimed * by the GRUB algorithm. */ u64 bw_ratio;};
对于实施队列相应的rt_rq则有所不同,并没有用红黑树实现。
/* Real-Time classes' related field in a runqueue: */struct rt_rq {struct rt_prio_array active;unsignedint rt_nr_running;unsignedint rr_nr_running;#ifdefinedCONFIG_SMP||definedCONFIG_RT_GROUP_SCHEDstruct {int curr; /* highest queued rt task prio */#ifdefCONFIG_SMPint next; /* next highest */#endif } highest_prio;#endif#ifdefCONFIG_SMPunsignedlong rt_nr_migratory;unsignedlong rt_nr_total;int overloaded;struct plist_head pushable_tasks;#endif /* CONFIG_SMP */int rt_queued;int rt_throttled; u64 rt_time; u64 rt_runtime; /* Nests inside the rq lock: */raw_spinlock_t rt_runtime_lock;#ifdefCONFIG_RT_GROUP_SCHEDunsignedlong rt_nr_boosted;struct rq *rq;struct task_group *tg;#endif};
下面再看看调度类sched_class,该类以函数指针的形式定义了诸多队列操作,如
enqueue_task 向就绪队列中添加一个任务,当某个任务进入可运行状态时,调用这个函数;
dequeue_task 将一个任务从就绪队列中删除;
yield_task将主动放弃CPU;
yield_to_task主动放弃CPU并执行指定的task_struct;
check_preempt_curr检查当前任务是否可被强占;
pick_next_task 选择接下来要运行的任务;
put_prev_task 用另一个进程代替当前运行的任务;
set_curr_task 用于修改调度策略;
task_tick 每次周期性时钟到的时候,这个函数被调用,可能触发调度。
task_dead:进程结束时调用
switched_from、switched_to:进程改变调度器时使用
prio_changed:改变进程优先级
struct sched_class {conststruct sched_class *next;void (*enqueue_task) (struct rq *rq,struct task_struct *p,int flags);void (*dequeue_task) (struct rq *rq,struct task_struct *p,int flags);void (*yield_task) (struct rq *rq);bool (*yield_to_task)(struct rq *rq,struct task_struct *p,bool preempt);void (*check_preempt_curr)(struct rq *rq,struct task_struct *p,int flags); /* * It is the responsibility of the pick_next_task() method that will * return the next task to call put_prev_task() on the @prev task or * something equivalent. * * May return RETRY_TASK when it finds a higher prio class has runnable * tasks. */struct task_struct * (*pick_next_task)(struct rq *rq,struct task_struct *prev,struct rq_flags *rf);void (*put_prev_task)(struct rq *rq,struct task_struct *p);......void (*set_curr_task)(struct rq *rq);void (*task_tick)(struct rq *rq,struct task_struct *p,int queued);void (*task_fork)(struct task_struct *p);void (*task_dead)(struct task_struct *p); /* * The switched_from() call is allowed to drop rq->lock, therefore we * cannot assume the switched_from/switched_to pair is serliazed by * rq->lock. They are however serialized by p->pi_lock. */void (*switched_from)(struct rq *this_rq,struct task_struct *task);void (*switched_to) (struct rq *this_rq,struct task_struct *task);void (*prio_changed) (struct rq *this_rq,struct task_struct *task,int oldprio);unsignedint (*get_rr_interval)(struct rq *rq,struct task_struct *task);void (*update_curr)(struct rq *rq);#defineTASK_SET_GROUP0#defineTASK_MOVE_GROUP1......};
/* * __schedule() is the main scheduler function. * The main means of driving the scheduler and thus entering this function are: * 1. Explicit blocking: mutex, semaphore, waitqueue, etc. * 2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return * paths. For example, see arch/x86/entry_64.S. * To drive preemption between tasks, the scheduler sets the flag in timer * interrupt handler scheduler_tick(). * 3. Wakeups don't really cause entry into schedule(). They add a * task to the run-queue and that's it. * Now, if the new task added to the run-queue preempts the current * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets * called on the nearest possible occasion: * - If the kernel is preemptible (CONFIG_PREEMPT=y): * - in syscall or exception context, at the next outmost * preempt_enable(). (this might be as soon as the wake_up()'s * spin_unlock()!) * - in IRQ context, return from interrupt-handler to * preemptible context * - If the kernel is not preemptible (CONFIG_PREEMPT is not set) * then at the next: * - cond_resched() call * - explicit schedule() call * - return from syscall or exception to user-space * - return from interrupt-handler to user-space * WARNING: must be called with preemption disabled! */staticvoid __sched notrace __schedule(bool preempt){struct task_struct *prev,*next;unsignedlong*switch_count;struct rq_flags rf;struct rq *rq;int cpu;//从当前的CPU中取出任务队列rq,prev赋值为当前任务 cpu =smp_processor_id(); rq =cpu_rq(cpu); prev =rq->curr;//检测当前任务是否可以调度schedule_debug(prev);if (sched_feat(HRTICK))hrtick_clear(rq);//禁止中断,RCU抢占关闭,队列加锁,SMP加锁local_irq_disable();rcu_note_context_switch(preempt); /* * Make sure that signal_pending_state()->signal_pending() below * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) * done by the caller to avoid the race with signal_wake_up(). * * The membarrier system call requires a full memory barrier * after coming from user-space, before storing to rq->curr. */rq_lock(rq,&rf);smp_mb__after_spinlock(); /* Promote REQ to ACT */rq->clock_update_flags <<=1;update_rq_clock(rq); switch_count =&prev->nivcsw;if (!preempt &&prev->state) {//不可中断的任务则继续执行if (signal_pending_state(prev->state, prev)) {prev->state = TASK_RUNNING; } else {//当前任务从队列rq中出队,on_rq设置为0,如果存在I/O未完成则延时完成deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);prev->on_rq =0;if (prev->in_iowait) {atomic_inc(&rq->nr_iowait);delayacct_blkio_start(); } /* 唤醒睡眠进程 * If a worker went to sleep, notify and ask workqueue * whether it wants to wake up a task to maintain * concurrency. */if (prev->flags & PF_WQ_WORKER) {struct task_struct *to_wakeup; to_wakeup =wq_worker_sleeping(prev);if (to_wakeup)try_to_wake_up_local(to_wakeup,&rf); } } switch_count =&prev->nvcsw; }// 调用pick_next_task获取下一个任务,赋值给next next =pick_next_task(rq, prev,&rf);clear_tsk_need_resched(prev);clear_preempt_need_resched();// 如果产生了任务切换,则需要切换上下文if (likely(prev != next)) {rq->nr_switches++;rq->curr = next; /* * The membarrier system call requires each architecture * to have a full memory barrier after updating * rq->curr, before returning to user-space. * * Here are the schemes providing that barrier on the * various architectures: * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC. * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC. * - finish_lock_switch() for weakly-ordered * architectures where spin_unlock is a full barrier, * - switch_to() for arm64 (weakly-ordered, spin_unlock * is a RELEASE barrier), */++*switch_count;trace_sched_switch(preempt, prev, next); /* Also unlocks the rq: */ rq =context_switch(rq, prev, next,&rf); } else {// 清除标记位,重开中断rq->clock_update_flags &=~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);rq_unlock_irq(rq,&rf); }//队列自平衡:红黑树平衡操作balance_callback(rq);}
/* * Pick up the highest-prio task: */staticinlinestruct task_struct *pick_next_task(struct rq *rq,struct task_struct *prev,struct rq_flags *rf){conststruct sched_class *class;struct task_struct *p; /* 这里做了一个优化:如果是普通调度策略则直接调用fair_sched_class中的pick_next_task * Optimization: we know that if all tasks are in the fair class we can * call that function directly, but only if the @prev task wasn't of a * higher scheduling class, because otherwise those loose the * opportunity to pull in more work from other CPUs. */if (likely((prev->sched_class ==&idle_sched_class ||prev->sched_class ==&fair_sched_class) &&rq->nr_running ==rq->cfs.h_nr_running)) { p =fair_sched_class.pick_next_task(rq, prev, rf);if (unlikely(p == RETRY_TASK))goto again; /* Assumes fair_sched_class->next == idle_sched_class */if (unlikely(!p)) p =idle_sched_class.pick_next_task(rq, prev, rf);return p; }again://依次调用类中的选择函数,如果正确选择到下一个任务则返回for_each_class(class) { p =class->pick_next_task(rq, prev, rf);if (p) {if (unlikely(p == RETRY_TASK))goto again;return p; } } /* The idle class should always have a runnable task: */BUG();}
下面来看看上下文切换。上下文切换主要干两件事情,一是切换任务空间,也即虚拟内存;二是切换寄存器和 CPU 上下文。关于任务空间的切换放在内存部分的文章中详细介绍,这里先按下不表,通过任务空间切换实际完成了用户态的上下文切换工作。下面我们重点看一下内核态切换,即寄存器和CPU上下文的切换。
/* * context_switch - switch to the new MM and the new thread's register state. */static __always_inline struct rq *context_switch(struct rq *rq,struct task_struct *prev,struct task_struct *next,struct rq_flags *rf){struct mm_struct *mm,*oldmm;prepare_task_switch(rq, prev, next); mm =next->mm; oldmm =prev->active_mm; /* * For paravirt, this is coupled with an exit in switch_to to * combine the page table reload and the switch backend into * one hypercall. */arch_start_context_switch(prev); /* * If mm is non-NULL, we pass through switch_mm(). If mm is * NULL, we will pass through mmdrop() in finish_task_switch(). * Both of these contain the full memory barrier required by * membarrier after storing to rq->curr, before returning to * user-space. */if (!mm) {next->active_mm = oldmm;mmgrab(oldmm);enter_lazy_tlb(oldmm, next); } elseswitch_mm_irqs_off(oldmm, mm, next);if (!prev->mm) {prev->active_mm =NULL;rq->prev_mm = oldmm; }rq->clock_update_flags &=~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);prepare_lock_switch(rq, next, rf); /* Here we just switch the register state and the stack. */switch_to(prev, next, prev);//barrier 语句是一个编译器指令,用于保证 switch_to 和 finish_task_switch 的执行顺序不会因为编译阶段优化而改变barrier();returnfinish_task_switch(prev);}
A保存内核栈和寄存器,切换至B,此时prev = A, next = B,该状态会保存在栈里,等下次调用A的时候再恢复。然后调用B的finish_task_switch()继续执行下去,返回B的队列rq,
B保存内核栈和寄存器,切换至C
C保存内核栈和寄存器,切换至A。A从barrier()开始运行,而A从步骤1中保存的prev = A, next = B则完美的避开了C,丢失了C的信息。因此last指针的重要性就出现了。在执行完__switch_to_asm后,A的内核栈和寄存器重新覆盖了prev和next,但是我们通过返回值提供了C的内存地址,保存在last中,在finish_task_switch中完成清理工作。
最终调用__switch_to()函数。该函数中涉及到一个结构体TSS(Task State Segment),该结构体存放了所有的寄存器。另外还有一个特殊的寄存器TR(Task Register)会指向TSS,我们通过更改TR的值,会触发硬件保存CPU所有寄存器在当前TSS,并从新的TSS读取寄存器的值加载入CPU,从而完成一次硬中断带来的上下文切换工作。系统初始化的时候,会调用 cpu_init()给每一个 CPU 关联一个 TSS,然后将 TR 指向这个 TSS,然后在操作系统的运行过程中,TR 就不切换了,永远指向这个 TSS。当修改TR的值得时候,则为任务调度。
/* * switch_to(x,y) should switch tasks from x to y. * * We fsave/fwait so that an exception goes off at the right time * (as a call from the fsave or fwait in effect) rather than to * the wrong process. Lazy FP saving no longer makes any sense * with modern CPU's, and this simplifies a lot of things (SMP * and UP become the same). * * NOTE! We used to use the x86 hardware context switching. The * reason for not using it any more becomes apparent when you * try to recover gracefully from saved state that is no longer * valid (stale segment register values in particular). With the * hardware task-switch, there is no way to fix up bad state in * a reasonable manner. * * The fact that Intel documents the hardware task-switching to * be slow is a fairly red herring - this code is not noticeably * faster. However, there _is_ some room for improvement here, * so the performance issues may eventually be a valid point. * More important, however, is the fact that this allows us much * more flexibility. * * The return value (in %ax) will be the "prev" task after * the task-switch, and shows up in ret_from_fork in entry.S, * for example. */__visible __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev_p,struct task_struct *next_p){struct thread_struct *prev =&prev_p->thread,*next =&next_p->thread;struct fpu *prev_fpu =&prev->fpu;struct fpu *next_fpu =&next->fpu;int cpu =smp_processor_id(); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */switch_fpu_prepare(prev_fpu, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the * stack on entry. No need to save %es and %ds, as those are * always kernel segments while inside the kernel. Doing this * before setting the new TLS descriptors avoids the situation * where we temporarily have non-reloadable segments in %fs * and %gs. This could be an issue if the NMI handler ever * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */lazy_save_gs(prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. */load_TLS(next, cpu); /* * Restore IOPL if needed. In normal use, the flags restore * in the switch assembly will handle this. But if the kernel * is running virtualized at a non-zero CPL, the popf will * not restore flags, so it must be done in a separate step. */if (get_kernel_rpl()&&unlikely(prev->iopl !=next->iopl))set_iopl_mask(next->iopl);switch_to_extra(prev_p, next_p); /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so * the GDT and LDT are properly updated, and must be * done before fpu__restore(), so the TS bit is up * to date. */arch_end_context_switch(next_p); /* * Reload esp0 and cpu_current_top_of_stack. This changes * current_thread_info(). Refresh the SYSENTER configuration in * case prev or next is vm86. */update_task_stack(next_p);refresh_sysenter_cs(next);this_cpu_write(cpu_current_top_of_stack, (unsignedlong)task_stack_page(next_p) + THREAD_SIZE); /* * Restore %gs if needed (which is common) */if (prev->gs |next->gs)lazy_load_gs(next->gs);switch_fpu_finish(next_fpu, cpu);this_cpu_write(current_task, next_p); /* Load the Intel cache allocation PQR MSR. */resctrl_sched_in();return prev_p;}
/** * finish_task_switch - clean up after a task-switch * @prev: the thread we just switched away from. * * finish_task_switch must be called after the context switch, paired * with a prepare_task_switch call before the context switch. * finish_task_switch will reconcile locking set up by prepare_task_switch, * and do any other architecture-specific cleanup actions. * * Note that we may have delayed dropping an mm in context_switch(). If * so, we finish that here outside of the runqueue lock. (Doing it * with the lock held can cause deadlocks; see schedule() for * details.) * * The context switch have flipped the stack from under us and restored the * local variables which were saved when this task called schedule() in the * past. prev == current is still correct but we need to recalculate this_rq * because prev may have moved to another CPU. */staticstruct rq *finish_task_switch(struct task_struct *prev)__releases(rq->lock){struct rq *rq =this_rq();struct mm_struct *mm =rq->prev_mm;long prev_state; /* * The previous task will have left us with a preempt_count of 2 * because it left us after: * * schedule() * preempt_disable(); // 1 * __schedule() * raw_spin_lock_irq(&rq->lock) // 2 * * Also, see FORK_PREEMPT_COUNT. */if (WARN_ONCE(preempt_count() !=2*PREEMPT_DISABLE_OFFSET,"corrupted preempt_count: %s/%d/0x%x\n",current->comm,current->pid, preempt_count()))preempt_count_set(FORK_PREEMPT_COUNT);rq->prev_mm =NULL; /* * A task struct has one reference for the use as "current". * If a task dies, then it sets TASK_DEAD in tsk->state and calls * schedule one last time. The schedule call will never return, and * the scheduled task must drop that reference. * * We must observe prev->state before clearing prev->on_cpu (in * finish_task), otherwise a concurrent wakeup can get prev * running on another CPU and we could rave with its RUNNING -> DEAD * transition, resulting in a double drop. */ prev_state =prev->state;vtime_task_switch(prev);perf_event_task_sched_in(prev, current);finish_task(prev);finish_lock_switch(rq);finish_arch_post_lock_switch();kcov_finish_switch(current);fire_sched_in_preempt_notifiers(current); /* * When switching through a kernel thread, the loop in * membarrier_{private,global}_expedited() may have observed that * kernel thread and not issued an IPI. It is therefore possible to * schedule between user->kernel->user threads without passing though * switch_mm(). Membarrier requires a barrier after storing to * rq->curr, before returning to userspace, so provide them here: * * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly * provided by mmdrop(), * - a sync_core for SYNC_CORE. */if (mm) {membarrier_mm_sync_core_before_usermode(mm);mmdrop(mm); }if (unlikely(prev_state == TASK_DEAD)) {if (prev->sched_class->task_dead)prev->sched_class->task_dead(prev); /* * Remove function-return probe instances associated with this * task and put them back on the free list. */kprobe_flush_task(prev); /* Task is done with its stack. */put_task_stack(prev);put_task_struct(prev); }tick_nohz_task_switch();return rq;}
/*
* scheduler tick hitting a task of our scheduling class.
* NOTE: This function can be called remotely by the tick offload that
* goes along full dynticks. Therefore no local assumption can be made
* and everything must be accessed through the @rq and @curr passed in
* parameters.
*/
static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &curr->se;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
entity_tick(cfs_rq, se, queued);
}
if (static_branch_unlikely(&sched_numa_balancing))
task_tick_numa(rq, curr);
update_misfit_status(curr, rq);
update_overutilized_status(task_rq(curr));
}
/*
* Preempt the current task with a newly woken task if needed:
*/
static void
check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
unsigned long ideal_runtime, delta_exec;
struct sched_entity *se;
s64 delta;
ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
if (delta_exec > ideal_runtime) {
resched_curr(rq_of(cfs_rq));
/*
* The current task ran long enough, ensure it doesn't get
* re-elected due to buddy favours.
*/
clear_buddies(cfs_rq, curr);
return;
}
/*
* Ensure that a task that missed wakeup preemption by a
* narrow margin doesn't have to wait for a full slice.
* This also mitigates buddy induced latencies under load.
*/
if (delta_exec < sysctl_sched_min_granularity)
return;
se = __pick_first_entity(cfs_rq);
delta = curr->vruntime - se->vruntime;
if (delta < 0)
return;
if (delta > ideal_runtime)
resched_curr(rq_of(cfs_rq));
}
/*
* resched_curr - mark rq's current task 'to be rescheduled now'.
*
* On UP this means the setting of the need_resched flag, on SMP it
* might also involve a cross-CPU call to trigger the scheduler on
* the target CPU.
*/
void resched_curr(struct rq *rq)
{
struct task_struct *curr = rq->curr;
int cpu;
.......
cpu = cpu_of(rq);
if (cpu == smp_processor_id()) {
set_tsk_need_resched(curr);
set_preempt_need_resched();
return;
}
if (set_nr_and_not_polling(curr))
smp_send_reschedule(cpu);
else
trace_sched_wake_idle_without_ipi(cpu);
}
staticvoidexit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags){while (true) { /* We have work to do. */local_irq_enable();if (cached_flags & _TIF_NEED_RESCHED)schedule();...... }