|
@@ -320,3 +320,172 @@ static struct throtl_grp *throtl_rb_first(struct throtl_rb_root *root)
|
|
|
|
|
|
if (root->left)
|
|
if (root->left)
|
|
return rb_entry_tg(root->left);
|
|
return rb_entry_tg(root->left);
|
|
|
|
+
|
|
|
|
+ return NULL;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void rb_erase_init(struct rb_node *n, struct rb_root *root)
|
|
|
|
+{
|
|
|
|
+ rb_erase(n, root);
|
|
|
|
+ RB_CLEAR_NODE(n);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void throtl_rb_erase(struct rb_node *n, struct throtl_rb_root *root)
|
|
|
|
+{
|
|
|
|
+ if (root->left == n)
|
|
|
|
+ root->left = NULL;
|
|
|
|
+ rb_erase_init(n, &root->rb);
|
|
|
|
+ --root->count;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void update_min_dispatch_time(struct throtl_rb_root *st)
|
|
|
|
+{
|
|
|
|
+ struct throtl_grp *tg;
|
|
|
|
+
|
|
|
|
+ tg = throtl_rb_first(st);
|
|
|
|
+ if (!tg)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ st->min_disptime = tg->disptime;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void
|
|
|
|
+tg_service_tree_add(struct throtl_rb_root *st, struct throtl_grp *tg)
|
|
|
|
+{
|
|
|
|
+ struct rb_node **node = &st->rb.rb_node;
|
|
|
|
+ struct rb_node *parent = NULL;
|
|
|
|
+ struct throtl_grp *__tg;
|
|
|
|
+ unsigned long key = tg->disptime;
|
|
|
|
+ int left = 1;
|
|
|
|
+
|
|
|
|
+ while (*node != NULL) {
|
|
|
|
+ parent = *node;
|
|
|
|
+ __tg = rb_entry_tg(parent);
|
|
|
|
+
|
|
|
|
+ if (time_before(key, __tg->disptime))
|
|
|
|
+ node = &parent->rb_left;
|
|
|
|
+ else {
|
|
|
|
+ node = &parent->rb_right;
|
|
|
|
+ left = 0;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (left)
|
|
|
|
+ st->left = &tg->rb_node;
|
|
|
|
+
|
|
|
|
+ rb_link_node(&tg->rb_node, parent, node);
|
|
|
|
+ rb_insert_color(&tg->rb_node, &st->rb);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void __throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg)
|
|
|
|
+{
|
|
|
|
+ struct throtl_rb_root *st = &td->tg_service_tree;
|
|
|
|
+
|
|
|
|
+ tg_service_tree_add(st, tg);
|
|
|
|
+ throtl_mark_tg_on_rr(tg);
|
|
|
|
+ st->count++;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg)
|
|
|
|
+{
|
|
|
|
+ if (!throtl_tg_on_rr(tg))
|
|
|
|
+ __throtl_enqueue_tg(td, tg);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void __throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg)
|
|
|
|
+{
|
|
|
|
+ throtl_rb_erase(&tg->rb_node, &td->tg_service_tree);
|
|
|
|
+ throtl_clear_tg_on_rr(tg);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg)
|
|
|
|
+{
|
|
|
|
+ if (throtl_tg_on_rr(tg))
|
|
|
|
+ __throtl_dequeue_tg(td, tg);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void throtl_schedule_next_dispatch(struct throtl_data *td)
|
|
|
|
+{
|
|
|
|
+ struct throtl_rb_root *st = &td->tg_service_tree;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If there are more bios pending, schedule more work.
|
|
|
|
+ */
|
|
|
|
+ if (!total_nr_queued(td))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ BUG_ON(!st->count);
|
|
|
|
+
|
|
|
|
+ update_min_dispatch_time(st);
|
|
|
|
+
|
|
|
|
+ if (time_before_eq(st->min_disptime, jiffies))
|
|
|
|
+ throtl_schedule_delayed_work(td, 0);
|
|
|
|
+ else
|
|
|
|
+ throtl_schedule_delayed_work(td, (st->min_disptime - jiffies));
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void
|
|
|
|
+throtl_start_new_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw)
|
|
|
|
+{
|
|
|
|
+ tg->bytes_disp[rw] = 0;
|
|
|
|
+ tg->io_disp[rw] = 0;
|
|
|
|
+ tg->slice_start[rw] = jiffies;
|
|
|
|
+ tg->slice_end[rw] = jiffies + throtl_slice;
|
|
|
|
+ throtl_log_tg(td, tg, "[%c] new slice start=%lu end=%lu jiffies=%lu",
|
|
|
|
+ rw == READ ? 'R' : 'W', tg->slice_start[rw],
|
|
|
|
+ tg->slice_end[rw], jiffies);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void throtl_set_slice_end(struct throtl_data *td,
|
|
|
|
+ struct throtl_grp *tg, bool rw, unsigned long jiffy_end)
|
|
|
|
+{
|
|
|
|
+ tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void throtl_extend_slice(struct throtl_data *td,
|
|
|
|
+ struct throtl_grp *tg, bool rw, unsigned long jiffy_end)
|
|
|
|
+{
|
|
|
|
+ tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
|
|
|
|
+ throtl_log_tg(td, tg, "[%c] extend slice start=%lu end=%lu jiffies=%lu",
|
|
|
|
+ rw == READ ? 'R' : 'W', tg->slice_start[rw],
|
|
|
|
+ tg->slice_end[rw], jiffies);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* Determine if previously allocated or extended slice is complete or not */
|
|
|
|
+static bool
|
|
|
|
+throtl_slice_used(struct throtl_data *td, struct throtl_grp *tg, bool rw)
|
|
|
|
+{
|
|
|
|
+ if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ return 1;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* Trim the used slices and adjust slice start accordingly */
|
|
|
|
+static inline void
|
|
|
|
+throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw)
|
|
|
|
+{
|
|
|
|
+ unsigned long nr_slices, time_elapsed, io_trim;
|
|
|
|
+ u64 bytes_trim, tmp;
|
|
|
|
+
|
|
|
|
+ BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If bps are unlimited (-1), then time slice don't get
|
|
|
|
+ * renewed. Don't try to trim the slice if slice is used. A new
|
|
|
|
+ * slice will start when appropriate.
|
|
|
|
+ */
|
|
|
|
+ if (throtl_slice_used(td, tg, rw))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * A bio has been dispatched. Also adjust slice_end. It might happen
|
|
|
|
+ * that initially cgroup limit was very low resulting in high
|
|
|
|
+ * slice_end, but later limit was bumped up and bio was dispached
|
|
|
|
+ * sooner, then we need to reduce slice_end. A high bogus slice_end
|
|
|
|
+ * is bad because it does not allow new slice to start.
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+ throtl_set_slice_end(td, tg, rw, jiffies + throtl_slice);
|
|
|
|
+
|
|
|
|
+ time_elapsed = jiffies - tg->slice_start[rw];
|