/* * CFQ, or complete fairness queueing, disk scheduler. * * Based on ideas from a previously unfinished io * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. * * Copyright (C) 2003 Jens Axboe */ #include #include #include #include #include #include #include #include #include "blk.h" #include "blk-cgroup.h" /* * tunables */ /* max queue in one round of service */ static const int cfq_quantum = 8; static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; /* maximum backwards seek, in KiB */ static const int cfq_back_max = 16 * 1024; /* penalty of a backwards seek */ static const int cfq_back_penalty = 2; static const int cfq_slice_sync = HZ / 10; static int cfq_slice_async = HZ / 25; static const int cfq_slice_async_rq = 2; static int cfq_slice_idle = HZ / 125; static int cfq_group_idle = HZ / 125; static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ static const int cfq_hist_divisor = 4; /* * offset from end of service tree */ #define CFQ_IDLE_DELAY (HZ / 5) /* * below this threshold, we consider thinktime immediate */ #define CFQ_MIN_TT (2) #define CFQ_SLICE_SCALE (5) #define CFQ_HW_QUEUE_MIN (5) #define CFQ_SERVICE_SHIFT 12 #define CFQQ_SEEK_THR (sector_t)(8 * 100) #define CFQQ_CLOSE_THR (sector_t)(8 * 1024) #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) #define RQ_CIC(rq) icq_to_cic((rq)->elv.icq) #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elv.priv[0]) #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1]) static struct kmem_cache *cfq_pool; #define CFQ_PRIO_LISTS IOPRIO_BE_NR #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) #define sample_valid(samples) ((samples) > 80) #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) struct cfq_ttime { unsigned long last_end_request; unsigned long ttime_total; unsigned long ttime_samples; unsigned long ttime_mean; }; /* * Most of our rbtree usage is for sorting with min extraction, so * if we cache the leftmost node we don't have to walk down the tree * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should * move this into the elevator for the rq sorting as well. */ struct cfq_rb_root { struct rb_root rb; struct rb_node *left; unsigned count; unsigned total_weight; u64 min_vdisktime; struct cfq_ttime ttime; }; #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \ .ttime = {.last_end_request = jiffies,},} /* * Per process-grouping structure */ struct cfq_queue { /* reference count */ int ref; /* various state flags, see below */ unsigned int flags; /* parent cfq_data */ struct cfq_data *cfqd; /* service_tree member */ struct rb_node rb_node; /* service_tree key */ unsigned long rb_key; /* prio tree member */ struct rb_node p_node; /* prio tree root we belong to, if any */ struct rb_root *p_root; /* sorted list of pending requests */ struct rb_root sort_list; /* if fifo isn't expired, next request to serve */ struct request *next_rq; /* requests queued in sort_list */ int queued[2]; /* currently allocated requests */ int allocated[2]; /* fifo list of requests in sort_list */ struct list_head fifo; /* time when queue got scheduled in to dispatch first request. */ unsigned long dispatch_start; unsigned int allocated_slice; unsigned int slice_dispatch; /* time when first request from queue completed and slice started. */ unsigned long slice_start; unsigned long slice_end; long slice_resid; /* pending priority requests */ int prio_pending; /* number of requests that are on the dispatch list or inside driver */ int dispatched; /* io prio of this group */ unsigned short ioprio, org_ioprio;