/* * Common Block IO controller cgroup interface * * Based on ideas and code from CFQ, CFS and BFQ: * Copyright (C) 2003 Jens Axboe * * Copyright (C) 2008 Fabio Checconi * Paolo Valente * * Copyright (C) 2009 Vivek Goyal * Nauman Rafique */ #include #include #include #include #include #include #include #include #include #include "blk-cgroup.h" #include "blk.h" #define MAX_KEY_LEN 100 static DEFINE_MUTEX(blkcg_pol_mutex); struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT }; EXPORT_SYMBOL_GPL(blkcg_root); static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { return pol && test_bit(pol->plid, q->blkcg_pols); } /** * blkg_free - free a blkg * @blkg: blkg to free * * Free @blkg which may be partially allocated. */ static void blkg_free(struct blkcg_gq *blkg) { int i; if (!blkg) return; for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; struct blkg_policy_data *pd = blkg->pd[i]; if (!pd) continue; if (pol && pol->pd_exit_fn) pol->pd_exit_fn(blkg); kfree(pd); } blk_exit_rl(&blkg->rl); kfree(blkg); } /** * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with * @q: request_queue the new blkg is associated with * @gfp_mask: allocation mask to use * * Allocate a new blkg assocating @blkcg and @q. */ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, gfp_t gfp_mask) { struct blkcg_gq *blkg; int i; /* alloc and init base part */ blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node); if (!blkg) return NULL; blkg->q = q; INIT_LIST_HEAD(&blkg->q_node); blkg->blkcg = blkcg; blkg->refcnt = 1; /* root blkg uses @q->root_rl, init rl only for !root blkgs */ if (blkcg != &blkcg_root) { if (blk_init_rl(&blkg->rl, q, gfp_mask)) goto err_free; blkg->rl.blkg = blkg; } for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; struct blkg_policy_data *pd; if (!blkcg_policy_enabled(q, pol)) continue; /* alloc per-policy data and attach it to blkg */ pd = kzalloc_node(pol->pd_size, gfp_mask, q->node); if (!pd) goto err_free; blkg->pd[i] = pd; pd->blkg = blkg; /* invoke per-policy init */ if (blkcg_policy_enabled(blkg->q, pol)) pol->pd_init_fn(blkg); } return blkg; err_free: blkg_free(blkg); return NULL; } static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q) { struct blkcg_gq *blkg; blkg = rcu_dereference(blkcg->blkg_hint); if (blkg && blkg->q == q) return blkg; /* * Hint didn't match. Look up from the radix tree. Note that we * may not be holding queue_lock and thus are not sure whether * @blkg from blkg_tree has already been removed or not, so we * can't update hint to the lookup result. Leave it to the caller. */ blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); if (blkg && blkg->q == q) return blkg; return NULL; } /** * blkg_lookup - lookup blkg for the specified blkcg - q pair * @blkcg: blkcg of interest * @q: request_queue of interest * * Lookup blkg for the @blkcg - @q pair. This function should be called * under RCU read lock and is guaranteed to return %NULL if @q is bypassing * - see blk_queue_bypass_start() for details. */ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) { WARN_ON_ONCE(!rcu_read_lock_held()); if (unlikely(blk_queue_bypass(q))) return NULL; return __blkg_lookup(blkcg, q); } EXPORT_SYMBOL_GPL(blkg_lookup); /* * If @new_blkg is %NULL, this function tries to allocate a new one as * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. */ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q, struct blkcg_gq *new_blkg) { struct blkcg_gq *blkg; int ret; WARN_ON_ONCE(!rcu_read_lock_held()); lockdep_assert_held(q->queue_lock); /* lookup and update hint on success, see __blkg_lookup() for details */ blkg = __blkg_lookup(blkcg, q); if (blkg) { rcu_assign_pointer(blkcg->blkg_hint, blkg); goto out_free; } /* blkg holds a reference to blkcg */ if (!css_tryget(&blkcg->css)) { blkg = ERR_PTR(-EINVAL); goto out_free; } /* allocate */ if (!new_blkg) { new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); if (unlikely(!new_blkg)) { blkg = ERR_PTR(-ENOMEM); goto out_put; } } blkg = new_blkg; /* insert */ spin_lock(&blkcg->lock); ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); if (likely(!ret)) { hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); list_add(&blkg->q_node, &q->blkg_list); } spin_unlock(&blkcg->lock); if (!ret) return blkg; blkg = ERR_PTR(ret); out_put: css_put(&blkcg->css); out_free: blkg_free(new_blkg); return blkg; } struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q) { /* * This could be the first entry point of blkcg implementation and * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); return __blkg_lookup_create(blkcg, q, NULL); } EXPORT_SYMBOL_GPL(blkg_lookup_create); static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(&blkcg->lock); /* Something wrong if we are trying to remove same group twice */ WARN_ON_ONCE(list_empty(&blkg->q_node)); WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); list_del_init(&blkg->q_node); hlist_del_init_rcu(&blkg->blkcg_node); /* * Both setting lookup hint to and clearing it from @blkg are done * under queue_lock. If it's not pointing to @blkg now, it never * will. Hint assignment itself can race safely. */ if (rcu_dereference_raw(blkcg->blkg_hint) == blkg) rcu_assign_pointer(blkcg->blkg_hint, NULL); /* * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ blkg_put(blkg); } /** * blkg_destroy_all - destroy all blkgs associated with a request_queue * @q: request_queue of interest * * Destroy all blkgs associated with @q. */ static void blkg_destroy_all(struct request_queue *q) { struct blkcg_gq *blkg, *n; lockdep_assert_held(q->queue_lock); list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); blkg_destroy(blkg); spin_unlock(&blkcg->lock); } /* * root blkg is destroyed. Just clear the pointer since * root_rl does not take reference on root blkg. */ q->root_blkg = NULL; q->root_rl.blkg = NULL; } static void blkg_rcu_free(struct rcu_head *rcu_head) { blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head)); } void __blkg_release(struct blkcg_gq *blkg) { /* release the extra blkcg reference this blkg has been holding */ css_put(&blkg->blkcg->css); /* * A group is freed in rcu manner. But having an rcu lock does not * mean that one can access all the fields of blkg and assume these * are valid. For example, don't try to follow throtl_data and * request queue links. * * Having a reference to blkg under an rcu allows acess to only * values local to groups like group stats and group rate limits */ call_rcu(&blkg->rcu_head, blkg_rcu_free); } EXPORT_SYMBOL_GPL(__blkg_release); /* * The next function used by blk_queue_for_each_rl(). It's a bit tricky * because the root blkg uses @q->root_rl instead of its own rl. */ struct request_list *__blk_queue_next_rl(struct request_list *rl, struct request_queue *q) { struct list_head *ent; struct blkcg_gq *blkg; /* * Determine the current blkg list_head. The first entry is * root_rl which is off @q->blkg_list and mapped to the head. */ if (rl == &q->root_rl) { ent = &q->blkg_list; /* There are no more block groups, hence no request lists */ if (list_empty(ent)) return NULL; } else { blkg = container_of(rl, struct blkcg_gq, rl); ent = &blkg->q_node; } /* walk to the next list_head, skip root blkcg */ ent = ent->next; if (ent == &q->root_blkg->q_node) ent = ent->next; if (ent == &q->blkg_list) return NULL; blkg = container_of(ent, struct blkcg_gq, q_node); return &blkg->rl;