| 
					
				 | 
			
			
				@@ -0,0 +1,144 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/* 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * Interface for controlling IO bandwidth on a request queue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include <linux/module.h> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include <linux/slab.h> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include <linux/blkdev.h> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include <linux/bio.h> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include <linux/blktrace_api.h> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include "blk-cgroup.h" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include "blk.h" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/* Max dispatch from a group in 1 round */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static int throtl_grp_quantum = 8; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/* Total max dispatch from all groups in one round */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static int throtl_quantum = 32; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/* Throttling is performed over 100ms slice and after that slice is renewed */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static unsigned long throtl_slice = HZ/10;	/* 100 ms */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static struct blkcg_policy blkcg_policy_throtl; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/* A workqueue to queue throttle related work */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static struct workqueue_struct *kthrotld_workqueue; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static void throtl_schedule_delayed_work(struct throtl_data *td, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+				unsigned long delay); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+struct throtl_rb_root { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct rb_root rb; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct rb_node *left; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned int count; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned long min_disptime; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#define THROTL_RB_ROOT	(struct throtl_rb_root) { .rb = RB_ROOT, .left = NULL, \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+			.count = 0, .min_disptime = 0} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#define rb_entry_tg(node)	rb_entry((node), struct throtl_grp, rb_node) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/* Per-cpu group stats */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+struct tg_stats_cpu { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* total bytes transferred */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct blkg_rwstat		service_bytes; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* total IOs serviced, post merge */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct blkg_rwstat		serviced; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+struct throtl_grp { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* must be the first member */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct blkg_policy_data pd; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* active throtl group service_tree member */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct rb_node rb_node; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 * Dispatch time in jiffies. This is the estimated time when group 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 * will unthrottle and is ready to dispatch more bio. It is used as 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 * key to sort active groups in service tree. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned long disptime; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned int flags; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* Two lists for READ and WRITE */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct bio_list bio_lists[2]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* Number of queued bios on READ and WRITE lists */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned int nr_queued[2]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* bytes per second rate limits */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	uint64_t bps[2]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* IOPS limits */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned int iops[2]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* Number of bytes disptached in current slice */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	uint64_t bytes_disp[2]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* Number of bio's dispatched in current slice */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned int io_disp[2]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* When did we start a new slice */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned long slice_start[2]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned long slice_end[2]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* Some throttle limits got updated for the group */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	int limits_changed; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* Per cpu stats pointer */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct tg_stats_cpu __percpu *stats_cpu; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* List of tgs waiting for per cpu stats memory to be allocated */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct list_head stats_alloc_node; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+struct throtl_data 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* service tree for active throtl groups */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct throtl_rb_root tg_service_tree; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct request_queue *queue; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* Total Number of queued bios on READ and WRITE lists */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned int nr_queued[2]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 * number of total undestroyed groups 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned int nr_undestroyed_grps; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* Work for dispatching throttled bios */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct delayed_work throtl_work; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	int limits_changed; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/* list and work item to allocate percpu group stats */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static DEFINE_SPINLOCK(tg_stats_alloc_lock); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static LIST_HEAD(tg_stats_alloc_list); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static void tg_stats_alloc_fn(struct work_struct *); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	return pd ? container_of(pd, struct throtl_grp, pd) : NULL; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	return pd_to_blkg(&tg->pd); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static inline struct throtl_grp *td_root_tg(struct throtl_data *td) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	return blkg_to_tg(td->queue->root_blkg); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 |