|
@@ -1086,3 +1086,189 @@ static struct cftype throtl_files[] = {
|
|
|
.read_seq_string = tg_print_cpu_rwstat,
|
|
|
},
|
|
|
{ } /* terminate */
|
|
|
+};
|
|
|
+
|
|
|
+static void throtl_shutdown_wq(struct request_queue *q)
|
|
|
+{
|
|
|
+ struct throtl_data *td = q->td;
|
|
|
+
|
|
|
+ cancel_delayed_work_sync(&td->throtl_work);
|
|
|
+}
|
|
|
+
|
|
|
+static struct blkcg_policy blkcg_policy_throtl = {
|
|
|
+ .pd_size = sizeof(struct throtl_grp),
|
|
|
+ .cftypes = throtl_files,
|
|
|
+
|
|
|
+ .pd_init_fn = throtl_pd_init,
|
|
|
+ .pd_exit_fn = throtl_pd_exit,
|
|
|
+ .pd_reset_stats_fn = throtl_pd_reset_stats,
|
|
|
+};
|
|
|
+
|
|
|
+bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
|
|
|
+{
|
|
|
+ struct throtl_data *td = q->td;
|
|
|
+ struct throtl_grp *tg;
|
|
|
+ bool rw = bio_data_dir(bio), update_disptime = true;
|
|
|
+ struct blkcg *blkcg;
|
|
|
+ bool throttled = false;
|
|
|
+
|
|
|
+ if (bio->bi_rw & REQ_THROTTLED) {
|
|
|
+ bio->bi_rw &= ~REQ_THROTTLED;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * A throtl_grp pointer retrieved under rcu can be used to access
|
|
|
+ * basic fields like stats and io rates. If a group has no rules,
|
|
|
+ * just update the dispatch stats in lockless manner and return.
|
|
|
+ */
|
|
|
+ rcu_read_lock();
|
|
|
+ blkcg = bio_blkcg(bio);
|
|
|
+ tg = throtl_lookup_tg(td, blkcg);
|
|
|
+ if (tg) {
|
|
|
+ if (tg_no_rule_group(tg, rw)) {
|
|
|
+ throtl_update_dispatch_stats(tg_to_blkg(tg),
|
|
|
+ bio->bi_size, bio->bi_rw);
|
|
|
+ goto out_unlock_rcu;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Either group has not been allocated yet or it is not an unlimited
|
|
|
+ * IO group
|
|
|
+ */
|
|
|
+ spin_lock_irq(q->queue_lock);
|
|
|
+ tg = throtl_lookup_create_tg(td, blkcg);
|
|
|
+ if (unlikely(!tg))
|
|
|
+ goto out_unlock;
|
|
|
+
|
|
|
+ if (tg->nr_queued[rw]) {
|
|
|
+ /*
|
|
|
+ * There is already another bio queued in same dir. No
|
|
|
+ * need to update dispatch time.
|
|
|
+ */
|
|
|
+ update_disptime = false;
|
|
|
+ goto queue_bio;
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Bio is with-in rate limit of group */
|
|
|
+ if (tg_may_dispatch(td, tg, bio, NULL)) {
|
|
|
+ throtl_charge_bio(tg, bio);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We need to trim slice even when bios are not being queued
|
|
|
+ * otherwise it might happen that a bio is not queued for
|
|
|
+ * a long time and slice keeps on extending and trim is not
|
|
|
+ * called for a long time. Now if limits are reduced suddenly
|
|
|
+ * we take into account all the IO dispatched so far at new
|
|
|
+ * low rate and * newly queued IO gets a really long dispatch
|
|
|
+ * time.
|
|
|
+ *
|
|
|
+ * So keep on trimming slice even if bio is not queued.
|
|
|
+ */
|
|
|
+ throtl_trim_slice(td, tg, rw);
|
|
|
+ goto out_unlock;
|
|
|
+ }
|
|
|
+
|
|
|
+queue_bio:
|
|
|
+ throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu"
|
|
|
+ " iodisp=%u iops=%u queued=%d/%d",
|
|
|
+ rw == READ ? 'R' : 'W',
|
|
|
+ tg->bytes_disp[rw], bio->bi_size, tg->bps[rw],
|
|
|
+ tg->io_disp[rw], tg->iops[rw],
|
|
|
+ tg->nr_queued[READ], tg->nr_queued[WRITE]);
|
|
|
+
|
|
|
+ bio_associate_current(bio);
|
|
|
+ throtl_add_bio_tg(q->td, tg, bio);
|
|
|
+ throttled = true;
|
|
|
+
|
|
|
+ if (update_disptime) {
|
|
|
+ tg_update_disptime(td, tg);
|
|
|
+ throtl_schedule_next_dispatch(td);
|
|
|
+ }
|
|
|
+
|
|
|
+out_unlock:
|
|
|
+ spin_unlock_irq(q->queue_lock);
|
|
|
+out_unlock_rcu:
|
|
|
+ rcu_read_unlock();
|
|
|
+out:
|
|
|
+ return throttled;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * blk_throtl_drain - drain throttled bios
|
|
|
+ * @q: request_queue to drain throttled bios for
|
|
|
+ *
|
|
|
+ * Dispatch all currently throttled bios on @q through ->make_request_fn().
|
|
|
+ */
|
|
|
+void blk_throtl_drain(struct request_queue *q)
|
|
|
+ __releases(q->queue_lock) __acquires(q->queue_lock)
|
|
|
+{
|
|
|
+ struct throtl_data *td = q->td;
|
|
|
+ struct throtl_rb_root *st = &td->tg_service_tree;
|
|
|
+ struct throtl_grp *tg;
|
|
|
+ struct bio_list bl;
|
|
|
+ struct bio *bio;
|
|
|
+
|
|
|
+ queue_lockdep_assert_held(q);
|
|
|
+
|
|
|
+ bio_list_init(&bl);
|
|
|
+
|
|
|
+ while ((tg = throtl_rb_first(st))) {
|
|
|
+ throtl_dequeue_tg(td, tg);
|
|
|
+
|
|
|
+ while ((bio = bio_list_peek(&tg->bio_lists[READ])))
|
|
|
+ tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
|
|
|
+ while ((bio = bio_list_peek(&tg->bio_lists[WRITE])))
|
|
|
+ tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
|
|
|
+ }
|
|
|
+ spin_unlock_irq(q->queue_lock);
|
|
|
+
|
|
|
+ while ((bio = bio_list_pop(&bl)))
|
|
|
+ generic_make_request(bio);
|
|
|
+
|
|
|
+ spin_lock_irq(q->queue_lock);
|
|
|
+}
|
|
|
+
|
|
|
+int blk_throtl_init(struct request_queue *q)
|
|
|
+{
|
|
|
+ struct throtl_data *td;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
|
|
|
+ if (!td)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ td->tg_service_tree = THROTL_RB_ROOT;
|
|
|
+ td->limits_changed = false;
|
|
|
+ INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
|
|
|
+
|
|
|
+ q->td = td;
|
|
|
+ td->queue = q;
|
|
|
+
|
|
|
+ /* activate policy */
|
|
|
+ ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
|
|
|
+ if (ret)
|
|
|
+ kfree(td);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+void blk_throtl_exit(struct request_queue *q)
|
|
|
+{
|
|
|
+ BUG_ON(!q->td);
|
|
|
+ throtl_shutdown_wq(q);
|
|
|
+ blkcg_deactivate_policy(q, &blkcg_policy_throtl);
|
|
|
+ kfree(q->td);
|
|
|
+}
|
|
|
+
|
|
|
+static int __init throtl_init(void)
|
|
|
+{
|
|
|
+ kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
|
|
|
+ if (!kthrotld_workqueue)
|
|
|
+ panic("Failed to create kthrotld\n");
|
|
|
+
|
|
|
+ return blkcg_policy_register(&blkcg_policy_throtl);
|
|
|
+}
|
|
|
+
|
|
|
+module_init(throtl_init);
|