|
@@ -0,0 +1,139 @@
|
|
|
+/*
|
|
|
+ * CFQ, or complete fairness queueing, disk scheduler.
|
|
|
+ *
|
|
|
+ * Based on ideas from a previously unfinished io
|
|
|
+ * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
|
|
|
+ *
|
|
|
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
|
|
|
+ */
|
|
|
+#include <linux/module.h>
|
|
|
+#include <linux/slab.h>
|
|
|
+#include <linux/blkdev.h>
|
|
|
+#include <linux/elevator.h>
|
|
|
+#include <linux/jiffies.h>
|
|
|
+#include <linux/rbtree.h>
|
|
|
+#include <linux/ioprio.h>
|
|
|
+#include <linux/blktrace_api.h>
|
|
|
+#include "blk.h"
|
|
|
+#include "blk-cgroup.h"
|
|
|
+
|
|
|
+/*
|
|
|
+ * tunables
|
|
|
+ */
|
|
|
+/* max queue in one round of service */
|
|
|
+static const int cfq_quantum = 8;
|
|
|
+static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
|
|
|
+/* maximum backwards seek, in KiB */
|
|
|
+static const int cfq_back_max = 16 * 1024;
|
|
|
+/* penalty of a backwards seek */
|
|
|
+static const int cfq_back_penalty = 2;
|
|
|
+static const int cfq_slice_sync = HZ / 10;
|
|
|
+static int cfq_slice_async = HZ / 25;
|
|
|
+static const int cfq_slice_async_rq = 2;
|
|
|
+static int cfq_slice_idle = HZ / 125;
|
|
|
+static int cfq_group_idle = HZ / 125;
|
|
|
+static const int cfq_target_latency = HZ * 3/10; /* 300 ms */
|
|
|
+static const int cfq_hist_divisor = 4;
|
|
|
+
|
|
|
+/*
|
|
|
+ * offset from end of service tree
|
|
|
+ */
|
|
|
+#define CFQ_IDLE_DELAY (HZ / 5)
|
|
|
+
|
|
|
+/*
|
|
|
+ * below this threshold, we consider thinktime immediate
|
|
|
+ */
|
|
|
+#define CFQ_MIN_TT (2)
|
|
|
+
|
|
|
+#define CFQ_SLICE_SCALE (5)
|
|
|
+#define CFQ_HW_QUEUE_MIN (5)
|
|
|
+#define CFQ_SERVICE_SHIFT 12
|
|
|
+
|
|
|
+#define CFQQ_SEEK_THR (sector_t)(8 * 100)
|
|
|
+#define CFQQ_CLOSE_THR (sector_t)(8 * 1024)
|
|
|
+#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
|
|
|
+#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
|
|
|
+
|
|
|
+#define RQ_CIC(rq) icq_to_cic((rq)->elv.icq)
|
|
|
+#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elv.priv[0])
|
|
|
+#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1])
|
|
|
+
|
|
|
+static struct kmem_cache *cfq_pool;
|
|
|
+
|
|
|
+#define CFQ_PRIO_LISTS IOPRIO_BE_NR
|
|
|
+#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
|
|
|
+#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
|
|
|
+
|
|
|
+#define sample_valid(samples) ((samples) > 80)
|
|
|
+#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
|
|
|
+
|
|
|
+struct cfq_ttime {
|
|
|
+ unsigned long last_end_request;
|
|
|
+
|
|
|
+ unsigned long ttime_total;
|
|
|
+ unsigned long ttime_samples;
|
|
|
+ unsigned long ttime_mean;
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * Most of our rbtree usage is for sorting with min extraction, so
|
|
|
+ * if we cache the leftmost node we don't have to walk down the tree
|
|
|
+ * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
|
|
|
+ * move this into the elevator for the rq sorting as well.
|
|
|
+ */
|
|
|
+struct cfq_rb_root {
|
|
|
+ struct rb_root rb;
|
|
|
+ struct rb_node *left;
|
|
|
+ unsigned count;
|
|
|
+ unsigned total_weight;
|
|
|
+ u64 min_vdisktime;
|
|
|
+ struct cfq_ttime ttime;
|
|
|
+};
|
|
|
+#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
|
|
|
+ .ttime = {.last_end_request = jiffies,},}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Per process-grouping structure
|
|
|
+ */
|
|
|
+struct cfq_queue {
|
|
|
+ /* reference count */
|
|
|
+ int ref;
|
|
|
+ /* various state flags, see below */
|
|
|
+ unsigned int flags;
|
|
|
+ /* parent cfq_data */
|
|
|
+ struct cfq_data *cfqd;
|
|
|
+ /* service_tree member */
|
|
|
+ struct rb_node rb_node;
|
|
|
+ /* service_tree key */
|
|
|
+ unsigned long rb_key;
|
|
|
+ /* prio tree member */
|
|
|
+ struct rb_node p_node;
|
|
|
+ /* prio tree root we belong to, if any */
|
|
|
+ struct rb_root *p_root;
|
|
|
+ /* sorted list of pending requests */
|
|
|
+ struct rb_root sort_list;
|
|
|
+ /* if fifo isn't expired, next request to serve */
|
|
|
+ struct request *next_rq;
|
|
|
+ /* requests queued in sort_list */
|
|
|
+ int queued[2];
|
|
|
+ /* currently allocated requests */
|
|
|
+ int allocated[2];
|
|
|
+ /* fifo list of requests in sort_list */
|
|
|
+ struct list_head fifo;
|
|
|
+
|
|
|
+ /* time when queue got scheduled in to dispatch first request. */
|
|
|
+ unsigned long dispatch_start;
|
|
|
+ unsigned int allocated_slice;
|
|
|
+ unsigned int slice_dispatch;
|
|
|
+ /* time when first request from queue completed and slice started. */
|
|
|
+ unsigned long slice_start;
|
|
|
+ unsigned long slice_end;
|
|
|
+ long slice_resid;
|
|
|
+
|
|
|
+ /* pending priority requests */
|
|
|
+ int prio_pending;
|
|
|
+ /* number of requests that are on the dispatch list or inside driver */
|
|
|
+ int dispatched;
|
|
|
+
|
|
|
+ /* io prio of this group */
|
|
|
+ unsigned short ioprio, org_ioprio;
|