|
@@ -611,3 +611,196 @@ struct cftype blkcg_files[] = {
|
|
|
* This is the blkcg counterpart of ioc_release_fn().
|
|
|
*/
|
|
|
static void blkcg_css_offline(struct cgroup *cgroup)
|
|
|
+{
|
|
|
+ struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
|
|
|
+
|
|
|
+ spin_lock_irq(&blkcg->lock);
|
|
|
+
|
|
|
+ while (!hlist_empty(&blkcg->blkg_list)) {
|
|
|
+ struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
|
|
|
+ struct blkcg_gq, blkcg_node);
|
|
|
+ struct request_queue *q = blkg->q;
|
|
|
+
|
|
|
+ if (spin_trylock(q->queue_lock)) {
|
|
|
+ blkg_destroy(blkg);
|
|
|
+ spin_unlock(q->queue_lock);
|
|
|
+ } else {
|
|
|
+ spin_unlock_irq(&blkcg->lock);
|
|
|
+ cpu_relax();
|
|
|
+ spin_lock_irq(&blkcg->lock);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_unlock_irq(&blkcg->lock);
|
|
|
+}
|
|
|
+
|
|
|
+static void blkcg_css_free(struct cgroup *cgroup)
|
|
|
+{
|
|
|
+ struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
|
|
|
+
|
|
|
+ if (blkcg != &blkcg_root)
|
|
|
+ kfree(blkcg);
|
|
|
+}
|
|
|
+
|
|
|
+static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup)
|
|
|
+{
|
|
|
+ static atomic64_t id_seq = ATOMIC64_INIT(0);
|
|
|
+ struct blkcg *blkcg;
|
|
|
+ struct cgroup *parent = cgroup->parent;
|
|
|
+
|
|
|
+ if (!parent) {
|
|
|
+ blkcg = &blkcg_root;
|
|
|
+ goto done;
|
|
|
+ }
|
|
|
+
|
|
|
+ blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
|
|
|
+ if (!blkcg)
|
|
|
+ return ERR_PTR(-ENOMEM);
|
|
|
+
|
|
|
+ blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
|
|
|
+ blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
|
|
|
+done:
|
|
|
+ spin_lock_init(&blkcg->lock);
|
|
|
+ INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
|
|
|
+ INIT_HLIST_HEAD(&blkcg->blkg_list);
|
|
|
+
|
|
|
+ return &blkcg->css;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * blkcg_init_queue - initialize blkcg part of request queue
|
|
|
+ * @q: request_queue to initialize
|
|
|
+ *
|
|
|
+ * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
|
|
|
+ * part of new request_queue @q.
|
|
|
+ *
|
|
|
+ * RETURNS:
|
|
|
+ * 0 on success, -errno on failure.
|
|
|
+ */
|
|
|
+int blkcg_init_queue(struct request_queue *q)
|
|
|
+{
|
|
|
+ might_sleep();
|
|
|
+
|
|
|
+ return blk_throtl_init(q);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * blkcg_drain_queue - drain blkcg part of request_queue
|
|
|
+ * @q: request_queue to drain
|
|
|
+ *
|
|
|
+ * Called from blk_drain_queue(). Responsible for draining blkcg part.
|
|
|
+ */
|
|
|
+void blkcg_drain_queue(struct request_queue *q)
|
|
|
+{
|
|
|
+ lockdep_assert_held(q->queue_lock);
|
|
|
+
|
|
|
+ blk_throtl_drain(q);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * blkcg_exit_queue - exit and release blkcg part of request_queue
|
|
|
+ * @q: request_queue being released
|
|
|
+ *
|
|
|
+ * Called from blk_release_queue(). Responsible for exiting blkcg part.
|
|
|
+ */
|
|
|
+void blkcg_exit_queue(struct request_queue *q)
|
|
|
+{
|
|
|
+ spin_lock_irq(q->queue_lock);
|
|
|
+ blkg_destroy_all(q);
|
|
|
+ spin_unlock_irq(q->queue_lock);
|
|
|
+
|
|
|
+ blk_throtl_exit(q);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * We cannot support shared io contexts, as we have no mean to support
|
|
|
+ * two tasks with the same ioc in two different groups without major rework
|
|
|
+ * of the main cic data structures. For now we allow a task to change
|
|
|
+ * its cgroup only if it's the only owner of its ioc.
|
|
|
+ */
|
|
|
+static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
|
|
|
+{
|
|
|
+ struct task_struct *task;
|
|
|
+ struct io_context *ioc;
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ /* task_lock() is needed to avoid races with exit_io_context() */
|
|
|
+ cgroup_taskset_for_each(task, cgrp, tset) {
|
|
|
+ task_lock(task);
|
|
|
+ ioc = task->io_context;
|
|
|
+ if (ioc && atomic_read(&ioc->nr_tasks) > 1)
|
|
|
+ ret = -EINVAL;
|
|
|
+ task_unlock(task);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+struct cgroup_subsys blkio_subsys = {
|
|
|
+ .name = "blkio",
|
|
|
+ .css_alloc = blkcg_css_alloc,
|
|
|
+ .css_offline = blkcg_css_offline,
|
|
|
+ .css_free = blkcg_css_free,
|
|
|
+ .can_attach = blkcg_can_attach,
|
|
|
+ .subsys_id = blkio_subsys_id,
|
|
|
+ .base_cftypes = blkcg_files,
|
|
|
+ .module = THIS_MODULE,
|
|
|
+
|
|
|
+ /*
|
|
|
+ * blkio subsystem is utterly broken in terms of hierarchy support.
|
|
|
+ * It treats all cgroups equally regardless of where they're
|
|
|
+ * located in the hierarchy - all cgroups are treated as if they're
|
|
|
+ * right below the root. Fix it and remove the following.
|
|
|
+ */
|
|
|
+ .broken_hierarchy = true,
|
|
|
+};
|
|
|
+EXPORT_SYMBOL_GPL(blkio_subsys);
|
|
|
+
|
|
|
+/**
|
|
|
+ * blkcg_activate_policy - activate a blkcg policy on a request_queue
|
|
|
+ * @q: request_queue of interest
|
|
|
+ * @pol: blkcg policy to activate
|
|
|
+ *
|
|
|
+ * Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through
|
|
|
+ * bypass mode to populate its blkgs with policy_data for @pol.
|
|
|
+ *
|
|
|
+ * Activation happens with @q bypassed, so nobody would be accessing blkgs
|
|
|
+ * from IO path. Update of each blkg is protected by both queue and blkcg
|
|
|
+ * locks so that holding either lock and testing blkcg_policy_enabled() is
|
|
|
+ * always enough for dereferencing policy data.
|
|
|
+ *
|
|
|
+ * The caller is responsible for synchronizing [de]activations and policy
|
|
|
+ * [un]registerations. Returns 0 on success, -errno on failure.
|
|
|
+ */
|
|
|
+int blkcg_activate_policy(struct request_queue *q,
|
|
|
+ const struct blkcg_policy *pol)
|
|
|
+{
|
|
|
+ LIST_HEAD(pds);
|
|
|
+ struct blkcg_gq *blkg;
|
|
|
+ struct blkg_policy_data *pd, *n;
|
|
|
+ int cnt = 0, ret;
|
|
|
+ bool preloaded;
|
|
|
+
|
|
|
+ if (blkcg_policy_enabled(q, pol))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ /* preallocations for root blkg */
|
|
|
+ blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
|
|
|
+ if (!blkg)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ preloaded = !radix_tree_preload(GFP_KERNEL);
|
|
|
+
|
|
|
+ blk_queue_bypass_start(q);
|
|
|
+
|
|
|
+ /* make sure the root blkg exists and count the existing blkgs */
|
|
|
+ spin_lock_irq(q->queue_lock);
|
|
|
+
|
|
|
+ rcu_read_lock();
|
|
|
+ blkg = __blkg_lookup_create(&blkcg_root, q, blkg);
|
|
|
+ rcu_read_unlock();
|
|
|
+
|
|
|
+ if (preloaded)
|
|
|
+ radix_tree_preload_end();
|
|
|
+
|