linux/kernel/sched_idletask.c

/*
 * idle-task scheduling class.
 *
 * (NOTE: these are not related to SCHED_IDLE tasks which are
 *  handled in sched_fair.c)
 */

/*
 * Idle tasks are unconditionally rescheduled:
 */
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
{
	resched_task(rq->idle);
}

static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now)
{
	schedstat_inc(rq, sched_goidle);

	return rq->idle;
}

/*
 * It is not legal to sleep in the idle task - print a warning
 * message if some code attempts to do it:
 */
static void
dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
{
	spin_unlock_irq(&rq->lock);
	printk(KERN_ERR "bad: scheduling from the idle thread!\n");
	dump_stack();
	spin_lock_irq(&rq->lock);
}

static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now)
{
}

static unsigned long
load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
			unsigned long max_nr_move, unsigned long max_load_move,
			struct sched_domain *sd, enum cpu_idle_type idle,
			int *all_pinned, int *this_best_prio)
{
	return 0;
}

static void task_tick_idle(struct rq *rq, struct task_struct *curr)
{
}

/*
 * Simple, special scheduling class for the per-CPU idle tasks:
 */
static struct sched_class idle_sched_class __read_mostly = {
	/* no enqueue/yield_task for idle tasks */

	/* dequeue is not valid, we print a debug message there: */
	.dequeue_task		= dequeue_task_idle,

	.check_preempt_curr	= check_preempt_curr_idle,

	.pick_next_task		= pick_next_task_idle,
	.put_prev_task		= put_prev_task_idle,

	.load_balance		= load_balance_idle,

	.task_tick		= task_tick_idle,
	/* no .task_new for idle tasks */
};
sched: cfs core, kernel/sched_idletask.c add kernel/sched_idletask.c - which implements the idle thread scheduling class. This further simplifies sched.c (under CFS), for example a number of 'if (p == rq->idle)' type of special-cases can be removed from sched.c, and schedule() gets simpler too. Signed-off-by: Ingo Molnar <mingo@elte.hu> 2007-07-09 16:51:58 +00:00			`/*`
			`* idle-task scheduling class.`
			`*`
			`* (NOTE: these are not related to SCHED_IDLE tasks which are`
			`* handled in sched_fair.c)`
			`*/`

			`/*`
			`* Idle tasks are unconditionally rescheduled:`
			`*/`
			`static void check_preempt_curr_idle(struct rq rq, struct task_struct p)`
			`{`
			`resched_task(rq->idle);`
			`}`

			`static struct task_struct pick_next_task_idle(struct rq rq, u64 now)`
			`{`
			`schedstat_inc(rq, sched_goidle);`

			`return rq->idle;`
			`}`

			`/*`
			`* It is not legal to sleep in the idle task - print a warning`
			`* message if some code attempts to do it:`
			`*/`
			`static void`
sched: remove the 'u64 now' parameter from ->dequeue_task() remove the 'u64 now' parameter from ->dequeue_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar <mingo@elte.hu> 2007-08-09 09:16:48 +00:00			`dequeue_task_idle(struct rq rq, struct task_struct p, int sleep)`
sched: cfs core, kernel/sched_idletask.c add kernel/sched_idletask.c - which implements the idle thread scheduling class. This further simplifies sched.c (under CFS), for example a number of 'if (p == rq->idle)' type of special-cases can be removed from sched.c, and schedule() gets simpler too. Signed-off-by: Ingo Molnar <mingo@elte.hu> 2007-07-09 16:51:58 +00:00			`{`
			`spin_unlock_irq(&rq->lock);`
			`printk(KERN_ERR "bad: scheduling from the idle thread!\n");`
			`dump_stack();`
			`spin_lock_irq(&rq->lock);`
			`}`

			`static void put_prev_task_idle(struct rq rq, struct task_struct prev, u64 now)`
			`{`
			`}`

sched: simplify move_tasks() The move_tasks() function is currently multiplexed with two distinct capabilities: 1. attempt to move a specified amount of weighted load from one run queue to another; and 2. attempt to move a specified number of tasks from one run queue to another. The first of these capabilities is used in two places, load_balance() and load_balance_idle(), and in both of these cases the return value of move_tasks() is used purely to decide if tasks/load were moved and no notice of the actual number of tasks moved is taken. The second capability is used in exactly one place, active_load_balance(), to attempt to move exactly one task and, as before, the return value is only used as an indicator of success or failure. This multiplexing of sched_task() was introduced, by me, as part of the smpnice patches and was motivated by the fact that the alternative, one function to move specified load and one to move a single task, would have led to two functions of roughly the same complexity as the old move_tasks() (or the new balance_tasks()). However, the new modular design of the new CFS scheduler allows a simpler solution to be adopted and this patch addresses that solution by: 1. adding a new function, move_one_task(), to be used by active_load_balance(); and 2. making move_tasks() a single purpose function that tries to move a specified weighted load and returns 1 for success and 0 for failure. One of the consequences of these changes is that neither move_one_task() or the new move_tasks() care how many tasks sched_class.load_balance() moves and this enables its interface to be simplified by returning the amount of load moved as its result and removing the load_moved pointer from the argument list. This helps simplify the new move_tasks() and slightly reduces the amount of work done in each of sched_class.load_balance()'s implementations. Further simplification, e.g. changes to balance_tasks(), are possible but (slightly) complicated by the special needs of load_balance_fair() so I've left them to a later patch (if this one gets accepted). NB Since move_tasks() gets called with two run queue locks held even small reductions in overhead are worthwhile. [ mingo@elte.hu ] this change also reduces code size nicely: text data bss dec hex filename 39216 3618 24 42858 a76a sched.o.before 39173 3618 24 42815 a73f sched.o.after Signed-off-by: Peter Williams <pwil3058@bigpond.net.au> Signed-off-by: Ingo Molnar <mingo@elte.hu> 2007-08-09 09:16:46 +00:00			`static unsigned long`
sched: cfs core, kernel/sched_idletask.c add kernel/sched_idletask.c - which implements the idle thread scheduling class. This further simplifies sched.c (under CFS), for example a number of 'if (p == rq->idle)' type of special-cases can be removed from sched.c, and schedule() gets simpler too. Signed-off-by: Ingo Molnar <mingo@elte.hu> 2007-07-09 16:51:58 +00:00			`load_balance_idle(struct rq this_rq, int this_cpu, struct rq busiest,`
			`unsigned long max_nr_move, unsigned long max_load_move,`
			`struct sched_domain *sd, enum cpu_idle_type idle,`
sched: fix bug in balance_tasks() There are two problems with balance_tasks() and how it used: 1. The variables best_prio and best_prio_seen (inherited from the old move_tasks()) were only required to handle problems caused by the active/expired arrays, the order in which they were processed and the possibility that the task with the highest priority could be on either. These issues are no longer present and the extra overhead associated with their use is unnecessary (and possibly wrong). 2. In the absence of CONFIG_FAIR_GROUP_SCHED being set, the same this_best_prio variable needs to be used by all scheduling classes or there is a risk of moving too much load. E.g. if the highest priority task on this at the beginning is a fairly low priority task and the rt class migrates a task (during its turn) then that moved task becomes the new highest priority task on this_rq but when the sched_fair class initializes its copy of this_best_prio it will get the priority of the original highest priority task as, due to the run queue locks being held, the reschedule triggered by pull_task() will not have taken place. This could result in inappropriate overriding of skip_for_load and excessive load being moved. The attached patch addresses these problems by deleting all reference to best_prio and best_prio_seen and making this_best_prio a reference parameter to the various functions involved. load_balance_fair() has also been modified so that this_best_prio is only reset (in the loop) if CONFIG_FAIR_GROUP_SCHED is set. This should preserve the effect of helping spread groups' higher priority tasks around the available CPUs while improving system performance when CONFIG_FAIR_GROUP_SCHED isn't set. Signed-off-by: Peter Williams <pwil3058@bigpond.net.au> Signed-off-by: Ingo Molnar <mingo@elte.hu> 2007-08-09 09:16:46 +00:00			`int all_pinned, int this_best_prio)`
sched: cfs core, kernel/sched_idletask.c add kernel/sched_idletask.c - which implements the idle thread scheduling class. This further simplifies sched.c (under CFS), for example a number of 'if (p == rq->idle)' type of special-cases can be removed from sched.c, and schedule() gets simpler too. Signed-off-by: Ingo Molnar <mingo@elte.hu> 2007-07-09 16:51:58 +00:00			`{`
			`return 0;`
			`}`

			`static void task_tick_idle(struct rq rq, struct task_struct curr)`
			`{`
			`}`

			`/*`
			`* Simple, special scheduling class for the per-CPU idle tasks:`
			`*/`
			`static struct sched_class idle_sched_class __read_mostly = {`
			`/* no enqueue/yield_task for idle tasks */`

			`/* dequeue is not valid, we print a debug message there: */`
			`.dequeue_task = dequeue_task_idle,`

			`.check_preempt_curr = check_preempt_curr_idle,`

			`.pick_next_task = pick_next_task_idle,`
			`.put_prev_task = put_prev_task_idle,`

			`.load_balance = load_balance_idle,`

			`.task_tick = task_tick_idle,`
			`/* no .task_new for idle tasks */`
			`};`