|
@@ -127,3 +127,187 @@ err_free:
|
|
|
|
|
|
static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
|
|
|
struct request_queue *q)
|
|
|
+{
|
|
|
+ struct blkcg_gq *blkg;
|
|
|
+
|
|
|
+ blkg = rcu_dereference(blkcg->blkg_hint);
|
|
|
+ if (blkg && blkg->q == q)
|
|
|
+ return blkg;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Hint didn't match. Look up from the radix tree. Note that we
|
|
|
+ * may not be holding queue_lock and thus are not sure whether
|
|
|
+ * @blkg from blkg_tree has already been removed or not, so we
|
|
|
+ * can't update hint to the lookup result. Leave it to the caller.
|
|
|
+ */
|
|
|
+ blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
|
|
|
+ if (blkg && blkg->q == q)
|
|
|
+ return blkg;
|
|
|
+
|
|
|
+ return NULL;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * blkg_lookup - lookup blkg for the specified blkcg - q pair
|
|
|
+ * @blkcg: blkcg of interest
|
|
|
+ * @q: request_queue of interest
|
|
|
+ *
|
|
|
+ * Lookup blkg for the @blkcg - @q pair. This function should be called
|
|
|
+ * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
|
|
|
+ * - see blk_queue_bypass_start() for details.
|
|
|
+ */
|
|
|
+struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
|
|
|
+{
|
|
|
+ WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
+
|
|
|
+ if (unlikely(blk_queue_bypass(q)))
|
|
|
+ return NULL;
|
|
|
+ return __blkg_lookup(blkcg, q);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(blkg_lookup);
|
|
|
+
|
|
|
+/*
|
|
|
+ * If @new_blkg is %NULL, this function tries to allocate a new one as
|
|
|
+ * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return.
|
|
|
+ */
|
|
|
+static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
|
|
+ struct request_queue *q,
|
|
|
+ struct blkcg_gq *new_blkg)
|
|
|
+{
|
|
|
+ struct blkcg_gq *blkg;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
+ lockdep_assert_held(q->queue_lock);
|
|
|
+
|
|
|
+ /* lookup and update hint on success, see __blkg_lookup() for details */
|
|
|
+ blkg = __blkg_lookup(blkcg, q);
|
|
|
+ if (blkg) {
|
|
|
+ rcu_assign_pointer(blkcg->blkg_hint, blkg);
|
|
|
+ goto out_free;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* blkg holds a reference to blkcg */
|
|
|
+ if (!css_tryget(&blkcg->css)) {
|
|
|
+ blkg = ERR_PTR(-EINVAL);
|
|
|
+ goto out_free;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* allocate */
|
|
|
+ if (!new_blkg) {
|
|
|
+ new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
|
|
|
+ if (unlikely(!new_blkg)) {
|
|
|
+ blkg = ERR_PTR(-ENOMEM);
|
|
|
+ goto out_put;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ blkg = new_blkg;
|
|
|
+
|
|
|
+ /* insert */
|
|
|
+ spin_lock(&blkcg->lock);
|
|
|
+ ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
|
|
|
+ if (likely(!ret)) {
|
|
|
+ hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
|
|
|
+ list_add(&blkg->q_node, &q->blkg_list);
|
|
|
+ }
|
|
|
+ spin_unlock(&blkcg->lock);
|
|
|
+
|
|
|
+ if (!ret)
|
|
|
+ return blkg;
|
|
|
+
|
|
|
+ blkg = ERR_PTR(ret);
|
|
|
+out_put:
|
|
|
+ css_put(&blkcg->css);
|
|
|
+out_free:
|
|
|
+ blkg_free(new_blkg);
|
|
|
+ return blkg;
|
|
|
+}
|
|
|
+
|
|
|
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
|
|
+ struct request_queue *q)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * This could be the first entry point of blkcg implementation and
|
|
|
+ * we shouldn't allow anything to go through for a bypassing queue.
|
|
|
+ */
|
|
|
+ if (unlikely(blk_queue_bypass(q)))
|
|
|
+ return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY);
|
|
|
+ return __blkg_lookup_create(blkcg, q, NULL);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(blkg_lookup_create);
|
|
|
+
|
|
|
+static void blkg_destroy(struct blkcg_gq *blkg)
|
|
|
+{
|
|
|
+ struct blkcg *blkcg = blkg->blkcg;
|
|
|
+
|
|
|
+ lockdep_assert_held(blkg->q->queue_lock);
|
|
|
+ lockdep_assert_held(&blkcg->lock);
|
|
|
+
|
|
|
+ /* Something wrong if we are trying to remove same group twice */
|
|
|
+ WARN_ON_ONCE(list_empty(&blkg->q_node));
|
|
|
+ WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
|
|
|
+
|
|
|
+ radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
|
|
|
+ list_del_init(&blkg->q_node);
|
|
|
+ hlist_del_init_rcu(&blkg->blkcg_node);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Both setting lookup hint to and clearing it from @blkg are done
|
|
|
+ * under queue_lock. If it's not pointing to @blkg now, it never
|
|
|
+ * will. Hint assignment itself can race safely.
|
|
|
+ */
|
|
|
+ if (rcu_dereference_raw(blkcg->blkg_hint) == blkg)
|
|
|
+ rcu_assign_pointer(blkcg->blkg_hint, NULL);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Put the reference taken at the time of creation so that when all
|
|
|
+ * queues are gone, group can be destroyed.
|
|
|
+ */
|
|
|
+ blkg_put(blkg);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * blkg_destroy_all - destroy all blkgs associated with a request_queue
|
|
|
+ * @q: request_queue of interest
|
|
|
+ *
|
|
|
+ * Destroy all blkgs associated with @q.
|
|
|
+ */
|
|
|
+static void blkg_destroy_all(struct request_queue *q)
|
|
|
+{
|
|
|
+ struct blkcg_gq *blkg, *n;
|
|
|
+
|
|
|
+ lockdep_assert_held(q->queue_lock);
|
|
|
+
|
|
|
+ list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
|
|
|
+ struct blkcg *blkcg = blkg->blkcg;
|
|
|
+
|
|
|
+ spin_lock(&blkcg->lock);
|
|
|
+ blkg_destroy(blkg);
|
|
|
+ spin_unlock(&blkcg->lock);
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * root blkg is destroyed. Just clear the pointer since
|
|
|
+ * root_rl does not take reference on root blkg.
|
|
|
+ */
|
|
|
+ q->root_blkg = NULL;
|
|
|
+ q->root_rl.blkg = NULL;
|
|
|
+}
|
|
|
+
|
|
|
+static void blkg_rcu_free(struct rcu_head *rcu_head)
|
|
|
+{
|
|
|
+ blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head));
|
|
|
+}
|
|
|
+
|
|
|
+void __blkg_release(struct blkcg_gq *blkg)
|
|
|
+{
|
|
|
+ /* release the extra blkcg reference this blkg has been holding */
|
|
|
+ css_put(&blkg->blkcg->css);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * A group is freed in rcu manner. But having an rcu lock does not
|
|
|
+ * mean that one can access all the fields of blkg and assume these
|
|
|
+ * are valid. For example, don't try to follow throtl_data and
|
|
|
+ * request queue links.
|
|
|
+ *
|
|
|
+ * Having a reference to blkg under an rcu allows acess to only
|