|  | @@ -1086,3 +1086,189 @@ static struct cftype throtl_files[] = {
 | 
	
		
			
				|  |  |  		.read_seq_string = tg_print_cpu_rwstat,
 | 
	
		
			
				|  |  |  	},
 | 
	
		
			
				|  |  |  	{ }	/* terminate */
 | 
	
		
			
				|  |  | +};
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +static void throtl_shutdown_wq(struct request_queue *q)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	struct throtl_data *td = q->td;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	cancel_delayed_work_sync(&td->throtl_work);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +static struct blkcg_policy blkcg_policy_throtl = {
 | 
	
		
			
				|  |  | +	.pd_size		= sizeof(struct throtl_grp),
 | 
	
		
			
				|  |  | +	.cftypes		= throtl_files,
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	.pd_init_fn		= throtl_pd_init,
 | 
	
		
			
				|  |  | +	.pd_exit_fn		= throtl_pd_exit,
 | 
	
		
			
				|  |  | +	.pd_reset_stats_fn	= throtl_pd_reset_stats,
 | 
	
		
			
				|  |  | +};
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	struct throtl_data *td = q->td;
 | 
	
		
			
				|  |  | +	struct throtl_grp *tg;
 | 
	
		
			
				|  |  | +	bool rw = bio_data_dir(bio), update_disptime = true;
 | 
	
		
			
				|  |  | +	struct blkcg *blkcg;
 | 
	
		
			
				|  |  | +	bool throttled = false;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	if (bio->bi_rw & REQ_THROTTLED) {
 | 
	
		
			
				|  |  | +		bio->bi_rw &= ~REQ_THROTTLED;
 | 
	
		
			
				|  |  | +		goto out;
 | 
	
		
			
				|  |  | +	}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	/*
 | 
	
		
			
				|  |  | +	 * A throtl_grp pointer retrieved under rcu can be used to access
 | 
	
		
			
				|  |  | +	 * basic fields like stats and io rates. If a group has no rules,
 | 
	
		
			
				|  |  | +	 * just update the dispatch stats in lockless manner and return.
 | 
	
		
			
				|  |  | +	 */
 | 
	
		
			
				|  |  | +	rcu_read_lock();
 | 
	
		
			
				|  |  | +	blkcg = bio_blkcg(bio);
 | 
	
		
			
				|  |  | +	tg = throtl_lookup_tg(td, blkcg);
 | 
	
		
			
				|  |  | +	if (tg) {
 | 
	
		
			
				|  |  | +		if (tg_no_rule_group(tg, rw)) {
 | 
	
		
			
				|  |  | +			throtl_update_dispatch_stats(tg_to_blkg(tg),
 | 
	
		
			
				|  |  | +						     bio->bi_size, bio->bi_rw);
 | 
	
		
			
				|  |  | +			goto out_unlock_rcu;
 | 
	
		
			
				|  |  | +		}
 | 
	
		
			
				|  |  | +	}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	/*
 | 
	
		
			
				|  |  | +	 * Either group has not been allocated yet or it is not an unlimited
 | 
	
		
			
				|  |  | +	 * IO group
 | 
	
		
			
				|  |  | +	 */
 | 
	
		
			
				|  |  | +	spin_lock_irq(q->queue_lock);
 | 
	
		
			
				|  |  | +	tg = throtl_lookup_create_tg(td, blkcg);
 | 
	
		
			
				|  |  | +	if (unlikely(!tg))
 | 
	
		
			
				|  |  | +		goto out_unlock;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	if (tg->nr_queued[rw]) {
 | 
	
		
			
				|  |  | +		/*
 | 
	
		
			
				|  |  | +		 * There is already another bio queued in same dir. No
 | 
	
		
			
				|  |  | +		 * need to update dispatch time.
 | 
	
		
			
				|  |  | +		 */
 | 
	
		
			
				|  |  | +		update_disptime = false;
 | 
	
		
			
				|  |  | +		goto queue_bio;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	/* Bio is with-in rate limit of group */
 | 
	
		
			
				|  |  | +	if (tg_may_dispatch(td, tg, bio, NULL)) {
 | 
	
		
			
				|  |  | +		throtl_charge_bio(tg, bio);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +		/*
 | 
	
		
			
				|  |  | +		 * We need to trim slice even when bios are not being queued
 | 
	
		
			
				|  |  | +		 * otherwise it might happen that a bio is not queued for
 | 
	
		
			
				|  |  | +		 * a long time and slice keeps on extending and trim is not
 | 
	
		
			
				|  |  | +		 * called for a long time. Now if limits are reduced suddenly
 | 
	
		
			
				|  |  | +		 * we take into account all the IO dispatched so far at new
 | 
	
		
			
				|  |  | +		 * low rate and * newly queued IO gets a really long dispatch
 | 
	
		
			
				|  |  | +		 * time.
 | 
	
		
			
				|  |  | +		 *
 | 
	
		
			
				|  |  | +		 * So keep on trimming slice even if bio is not queued.
 | 
	
		
			
				|  |  | +		 */
 | 
	
		
			
				|  |  | +		throtl_trim_slice(td, tg, rw);
 | 
	
		
			
				|  |  | +		goto out_unlock;
 | 
	
		
			
				|  |  | +	}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +queue_bio:
 | 
	
		
			
				|  |  | +	throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu"
 | 
	
		
			
				|  |  | +			" iodisp=%u iops=%u queued=%d/%d",
 | 
	
		
			
				|  |  | +			rw == READ ? 'R' : 'W',
 | 
	
		
			
				|  |  | +			tg->bytes_disp[rw], bio->bi_size, tg->bps[rw],
 | 
	
		
			
				|  |  | +			tg->io_disp[rw], tg->iops[rw],
 | 
	
		
			
				|  |  | +			tg->nr_queued[READ], tg->nr_queued[WRITE]);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	bio_associate_current(bio);
 | 
	
		
			
				|  |  | +	throtl_add_bio_tg(q->td, tg, bio);
 | 
	
		
			
				|  |  | +	throttled = true;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	if (update_disptime) {
 | 
	
		
			
				|  |  | +		tg_update_disptime(td, tg);
 | 
	
		
			
				|  |  | +		throtl_schedule_next_dispatch(td);
 | 
	
		
			
				|  |  | +	}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +out_unlock:
 | 
	
		
			
				|  |  | +	spin_unlock_irq(q->queue_lock);
 | 
	
		
			
				|  |  | +out_unlock_rcu:
 | 
	
		
			
				|  |  | +	rcu_read_unlock();
 | 
	
		
			
				|  |  | +out:
 | 
	
		
			
				|  |  | +	return throttled;
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | + * blk_throtl_drain - drain throttled bios
 | 
	
		
			
				|  |  | + * @q: request_queue to drain throttled bios for
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + * Dispatch all currently throttled bios on @q through ->make_request_fn().
 | 
	
		
			
				|  |  | + */
 | 
	
		
			
				|  |  | +void blk_throtl_drain(struct request_queue *q)
 | 
	
		
			
				|  |  | +	__releases(q->queue_lock) __acquires(q->queue_lock)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	struct throtl_data *td = q->td;
 | 
	
		
			
				|  |  | +	struct throtl_rb_root *st = &td->tg_service_tree;
 | 
	
		
			
				|  |  | +	struct throtl_grp *tg;
 | 
	
		
			
				|  |  | +	struct bio_list bl;
 | 
	
		
			
				|  |  | +	struct bio *bio;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	queue_lockdep_assert_held(q);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	bio_list_init(&bl);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	while ((tg = throtl_rb_first(st))) {
 | 
	
		
			
				|  |  | +		throtl_dequeue_tg(td, tg);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +		while ((bio = bio_list_peek(&tg->bio_lists[READ])))
 | 
	
		
			
				|  |  | +			tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
 | 
	
		
			
				|  |  | +		while ((bio = bio_list_peek(&tg->bio_lists[WRITE])))
 | 
	
		
			
				|  |  | +			tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
 | 
	
		
			
				|  |  | +	}
 | 
	
		
			
				|  |  | +	spin_unlock_irq(q->queue_lock);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	while ((bio = bio_list_pop(&bl)))
 | 
	
		
			
				|  |  | +		generic_make_request(bio);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	spin_lock_irq(q->queue_lock);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +int blk_throtl_init(struct request_queue *q)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	struct throtl_data *td;
 | 
	
		
			
				|  |  | +	int ret;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
 | 
	
		
			
				|  |  | +	if (!td)
 | 
	
		
			
				|  |  | +		return -ENOMEM;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	td->tg_service_tree = THROTL_RB_ROOT;
 | 
	
		
			
				|  |  | +	td->limits_changed = false;
 | 
	
		
			
				|  |  | +	INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	q->td = td;
 | 
	
		
			
				|  |  | +	td->queue = q;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	/* activate policy */
 | 
	
		
			
				|  |  | +	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
 | 
	
		
			
				|  |  | +	if (ret)
 | 
	
		
			
				|  |  | +		kfree(td);
 | 
	
		
			
				|  |  | +	return ret;
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +void blk_throtl_exit(struct request_queue *q)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	BUG_ON(!q->td);
 | 
	
		
			
				|  |  | +	throtl_shutdown_wq(q);
 | 
	
		
			
				|  |  | +	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
 | 
	
		
			
				|  |  | +	kfree(q->td);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +static int __init throtl_init(void)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +	kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
 | 
	
		
			
				|  |  | +	if (!kthrotld_workqueue)
 | 
	
		
			
				|  |  | +		panic("Failed to create kthrotld\n");
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +	return blkcg_policy_register(&blkcg_policy_throtl);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +module_init(throtl_init);
 |