|  | @@ -611,3 +611,196 @@ struct cftype blkcg_files[] = {
 | 
	
		
			
				|  |  |   * This is the blkcg counterpart of ioc_release_fn().
 | 
	
		
			
				|  |  |   */
 | 
	
		
			
				|  |  |  static void blkcg_css_offline(struct cgroup *cgroup)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	spin_lock_irq(&blkcg->lock);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	while (!hlist_empty(&blkcg->blkg_list)) {
 | 
	
		
			
				|  |  | +		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
 | 
	
		
			
				|  |  | +						struct blkcg_gq, blkcg_node);
 | 
	
		
			
				|  |  | +		struct request_queue *q = blkg->q;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +		if (spin_trylock(q->queue_lock)) {
 | 
	
		
			
				|  |  | +			blkg_destroy(blkg);
 | 
	
		
			
				|  |  | +			spin_unlock(q->queue_lock);
 | 
	
		
			
				|  |  | +		} else {
 | 
	
		
			
				|  |  | +			spin_unlock_irq(&blkcg->lock);
 | 
	
		
			
				|  |  | +			cpu_relax();
 | 
	
		
			
				|  |  | +			spin_lock_irq(&blkcg->lock);
 | 
	
		
			
				|  |  | +		}
 | 
	
		
			
				|  |  | +	}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	spin_unlock_irq(&blkcg->lock);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +static void blkcg_css_free(struct cgroup *cgroup)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	if (blkcg != &blkcg_root)
 | 
	
		
			
				|  |  | +		kfree(blkcg);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	static atomic64_t id_seq = ATOMIC64_INIT(0);
 | 
	
		
			
				|  |  | +	struct blkcg *blkcg;
 | 
	
		
			
				|  |  | +	struct cgroup *parent = cgroup->parent;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	if (!parent) {
 | 
	
		
			
				|  |  | +		blkcg = &blkcg_root;
 | 
	
		
			
				|  |  | +		goto done;
 | 
	
		
			
				|  |  | +	}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
 | 
	
		
			
				|  |  | +	if (!blkcg)
 | 
	
		
			
				|  |  | +		return ERR_PTR(-ENOMEM);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
 | 
	
		
			
				|  |  | +	blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
 | 
	
		
			
				|  |  | +done:
 | 
	
		
			
				|  |  | +	spin_lock_init(&blkcg->lock);
 | 
	
		
			
				|  |  | +	INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
 | 
	
		
			
				|  |  | +	INIT_HLIST_HEAD(&blkcg->blkg_list);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	return &blkcg->css;
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | + * blkcg_init_queue - initialize blkcg part of request queue
 | 
	
		
			
				|  |  | + * @q: request_queue to initialize
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
 | 
	
		
			
				|  |  | + * part of new request_queue @q.
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + * RETURNS:
 | 
	
		
			
				|  |  | + * 0 on success, -errno on failure.
 | 
	
		
			
				|  |  | + */
 | 
	
		
			
				|  |  | +int blkcg_init_queue(struct request_queue *q)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	might_sleep();
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	return blk_throtl_init(q);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | + * blkcg_drain_queue - drain blkcg part of request_queue
 | 
	
		
			
				|  |  | + * @q: request_queue to drain
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + * Called from blk_drain_queue().  Responsible for draining blkcg part.
 | 
	
		
			
				|  |  | + */
 | 
	
		
			
				|  |  | +void blkcg_drain_queue(struct request_queue *q)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	lockdep_assert_held(q->queue_lock);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	blk_throtl_drain(q);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | + * blkcg_exit_queue - exit and release blkcg part of request_queue
 | 
	
		
			
				|  |  | + * @q: request_queue being released
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + * Called from blk_release_queue().  Responsible for exiting blkcg part.
 | 
	
		
			
				|  |  | + */
 | 
	
		
			
				|  |  | +void blkcg_exit_queue(struct request_queue *q)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	spin_lock_irq(q->queue_lock);
 | 
	
		
			
				|  |  | +	blkg_destroy_all(q);
 | 
	
		
			
				|  |  | +	spin_unlock_irq(q->queue_lock);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	blk_throtl_exit(q);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/*
 | 
	
		
			
				|  |  | + * We cannot support shared io contexts, as we have no mean to support
 | 
	
		
			
				|  |  | + * two tasks with the same ioc in two different groups without major rework
 | 
	
		
			
				|  |  | + * of the main cic data structures.  For now we allow a task to change
 | 
	
		
			
				|  |  | + * its cgroup only if it's the only owner of its ioc.
 | 
	
		
			
				|  |  | + */
 | 
	
		
			
				|  |  | +static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	struct task_struct *task;
 | 
	
		
			
				|  |  | +	struct io_context *ioc;
 | 
	
		
			
				|  |  | +	int ret = 0;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	/* task_lock() is needed to avoid races with exit_io_context() */
 | 
	
		
			
				|  |  | +	cgroup_taskset_for_each(task, cgrp, tset) {
 | 
	
		
			
				|  |  | +		task_lock(task);
 | 
	
		
			
				|  |  | +		ioc = task->io_context;
 | 
	
		
			
				|  |  | +		if (ioc && atomic_read(&ioc->nr_tasks) > 1)
 | 
	
		
			
				|  |  | +			ret = -EINVAL;
 | 
	
		
			
				|  |  | +		task_unlock(task);
 | 
	
		
			
				|  |  | +		if (ret)
 | 
	
		
			
				|  |  | +			break;
 | 
	
		
			
				|  |  | +	}
 | 
	
		
			
				|  |  | +	return ret;
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +struct cgroup_subsys blkio_subsys = {
 | 
	
		
			
				|  |  | +	.name = "blkio",
 | 
	
		
			
				|  |  | +	.css_alloc = blkcg_css_alloc,
 | 
	
		
			
				|  |  | +	.css_offline = blkcg_css_offline,
 | 
	
		
			
				|  |  | +	.css_free = blkcg_css_free,
 | 
	
		
			
				|  |  | +	.can_attach = blkcg_can_attach,
 | 
	
		
			
				|  |  | +	.subsys_id = blkio_subsys_id,
 | 
	
		
			
				|  |  | +	.base_cftypes = blkcg_files,
 | 
	
		
			
				|  |  | +	.module = THIS_MODULE,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	/*
 | 
	
		
			
				|  |  | +	 * blkio subsystem is utterly broken in terms of hierarchy support.
 | 
	
		
			
				|  |  | +	 * It treats all cgroups equally regardless of where they're
 | 
	
		
			
				|  |  | +	 * located in the hierarchy - all cgroups are treated as if they're
 | 
	
		
			
				|  |  | +	 * right below the root.  Fix it and remove the following.
 | 
	
		
			
				|  |  | +	 */
 | 
	
		
			
				|  |  | +	.broken_hierarchy = true,
 | 
	
		
			
				|  |  | +};
 | 
	
		
			
				|  |  | +EXPORT_SYMBOL_GPL(blkio_subsys);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | + * blkcg_activate_policy - activate a blkcg policy on a request_queue
 | 
	
		
			
				|  |  | + * @q: request_queue of interest
 | 
	
		
			
				|  |  | + * @pol: blkcg policy to activate
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + * Activate @pol on @q.  Requires %GFP_KERNEL context.  @q goes through
 | 
	
		
			
				|  |  | + * bypass mode to populate its blkgs with policy_data for @pol.
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + * Activation happens with @q bypassed, so nobody would be accessing blkgs
 | 
	
		
			
				|  |  | + * from IO path.  Update of each blkg is protected by both queue and blkcg
 | 
	
		
			
				|  |  | + * locks so that holding either lock and testing blkcg_policy_enabled() is
 | 
	
		
			
				|  |  | + * always enough for dereferencing policy data.
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + * The caller is responsible for synchronizing [de]activations and policy
 | 
	
		
			
				|  |  | + * [un]registerations.  Returns 0 on success, -errno on failure.
 | 
	
		
			
				|  |  | + */
 | 
	
		
			
				|  |  | +int blkcg_activate_policy(struct request_queue *q,
 | 
	
		
			
				|  |  | +			  const struct blkcg_policy *pol)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	LIST_HEAD(pds);
 | 
	
		
			
				|  |  | +	struct blkcg_gq *blkg;
 | 
	
		
			
				|  |  | +	struct blkg_policy_data *pd, *n;
 | 
	
		
			
				|  |  | +	int cnt = 0, ret;
 | 
	
		
			
				|  |  | +	bool preloaded;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	if (blkcg_policy_enabled(q, pol))
 | 
	
		
			
				|  |  | +		return 0;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	/* preallocations for root blkg */
 | 
	
		
			
				|  |  | +	blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
 | 
	
		
			
				|  |  | +	if (!blkg)
 | 
	
		
			
				|  |  | +		return -ENOMEM;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	preloaded = !radix_tree_preload(GFP_KERNEL);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	blk_queue_bypass_start(q);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	/* make sure the root blkg exists and count the existing blkgs */
 | 
	
		
			
				|  |  | +	spin_lock_irq(q->queue_lock);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	rcu_read_lock();
 | 
	
		
			
				|  |  | +	blkg = __blkg_lookup_create(&blkcg_root, q, blkg);
 | 
	
		
			
				|  |  | +	rcu_read_unlock();
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	if (preloaded)
 | 
	
		
			
				|  |  | +		radix_tree_preload_end();
 | 
	
		
			
				|  |  | +
 |