| 
					
				 | 
			
			
				@@ -0,0 +1,180 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/* 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * Functions to sequence FLUSH and FUA writes. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * Copyright (C) 2011		Max Planck Institute for Gravitational Physics 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * Copyright (C) 2011		Tejun Heo <tj@kernel.org> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * This file is released under the GPLv2. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * REQ_{FLUSH|FUA} requests are decomposed to sequences consisted of three 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * properties and hardware capability. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * If a request doesn't have data, only REQ_FLUSH makes sense, which 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * indicates a simple flush request.  If there is data, REQ_FLUSH indicates 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * that the device cache should be flushed before the data is executed, and 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * REQ_FUA means that the data must be on non-volatile media on request 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * completion. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * If the device doesn't have writeback cache, FLUSH and FUA don't make any 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * difference.  The requests are either completed immediately if there's no 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * data or executed as normal requests otherwise. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * If the device has writeback cache and supports FUA, REQ_FLUSH is 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * translated to PREFLUSH but REQ_FUA is passed down directly with DATA. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * translated to PREFLUSH and REQ_FUA to POSTFLUSH. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * The actual execution of flush is double buffered.  Whenever a request 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * needs to execute PRE or POSTFLUSH, it queues at 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * q->flush_queue[q->flush_pending_idx].  Once certain criteria are met, a 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * flush is issued and the pending_idx is toggled.  When the flush 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * completes, all the requests which were pending are proceeded to the next 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * step.  This allows arbitrary merging of different types of FLUSH/FUA 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * requests. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * Currently, the following conditions are used to determine when to issue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * flush. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * C1. At any given time, only one flush shall be in progress.  This makes 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ *     double buffering sufficient. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * C2. Flush is deferred if any request is executing DATA of its sequence. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ *     This avoids issuing separate POSTFLUSHes for requests which shared 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ *     PREFLUSH. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * C3. The second condition is ignored if there is a request which has 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ *     waited longer than FLUSH_PENDING_TIMEOUT.  This is to avoid 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ *     starvation in the unlikely case where there are continuous stream of 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ *     FUA (without FLUSH) requests. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * For devices which support FUA, it isn't clear whether C2 (and thus C3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * is beneficial. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * Note that a sequenced FLUSH/FUA request with DATA is completed twice. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * Once while executing DATA and again after the whole sequence is 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * complete.  The first completion updates the contained bio but doesn't 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * finish it so that the bio submitter is notified only after the whole 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * sequence is complete.  This is implemented by testing REQ_FLUSH_SEQ in 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * req_bio_endio(). 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * The above peculiarity requires that each FLUSH/FUA request has only one 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * bio attached to it, which is guaranteed as they aren't allowed to be 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * merged in the usual way. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include <linux/kernel.h> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include <linux/module.h> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include <linux/bio.h> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include <linux/blkdev.h> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include <linux/gfp.h> 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#include "blk.h" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/* FLUSH/FUA sequences */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+enum { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	REQ_FSEQ_PREFLUSH	= (1 << 0), /* pre-flushing in progress */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	REQ_FSEQ_DATA		= (1 << 1), /* data write in progress */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	REQ_FSEQ_POSTFLUSH	= (1 << 2), /* post-flushing in progress */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	REQ_FSEQ_DONE		= (1 << 3), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	REQ_FSEQ_ACTIONS	= REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA | 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+				  REQ_FSEQ_POSTFLUSH, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 * If flush has been pending longer than the following timeout, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 * it's issued even if flush_data requests are still in flight. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	FLUSH_PENDING_TIMEOUT	= 5 * HZ, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static bool blk_kick_flush(struct request_queue *q); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	unsigned int policy = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	if (blk_rq_sectors(rq)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		policy |= REQ_FSEQ_DATA; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	if (fflags & REQ_FLUSH) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		if (rq->cmd_flags & REQ_FLUSH) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+			policy |= REQ_FSEQ_PREFLUSH; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+			policy |= REQ_FSEQ_POSTFLUSH; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	return policy; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static unsigned int blk_flush_cur_seq(struct request *rq) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	return 1 << ffz(rq->flush.seq); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static void blk_flush_restore_request(struct request *rq) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 * After flush data completion, @rq->bio is %NULL but we need to 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 * complete the bio again.  @rq->biotail is guaranteed to equal the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 * original @rq->bio.  Restore it. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	 */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	rq->bio = rq->biotail; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	/* make @rq a normal request */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	rq->cmd_flags &= ~REQ_FLUSH_SEQ; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	rq->end_io = rq->flush.saved_end_io; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * blk_flush_complete_seq - complete flush sequence 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @rq: FLUSH/FUA request being sequenced 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @error: whether an error occurred 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @rq just completed @seq part of its flush sequence, record the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * completion and trigger the next step. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * CONTEXT: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * spin_lock_irq(q->queue_lock) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * RETURNS: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * %true if requests were added to the dispatch queue, %false otherwise. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+				   int error) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct request_queue *q = rq->q; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	bool queued = false; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	BUG_ON(rq->flush.seq & seq); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	rq->flush.seq |= seq; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	if (likely(!error)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		seq = blk_flush_cur_seq(rq); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	else 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		seq = REQ_FSEQ_DONE; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	switch (seq) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	case REQ_FSEQ_PREFLUSH: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	case REQ_FSEQ_POSTFLUSH: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		/* queue for flush */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		if (list_empty(pending)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+			q->flush_pending_since = jiffies; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		list_move_tail(&rq->flush.list, pending); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		break; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	case REQ_FSEQ_DATA: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		list_move_tail(&rq->flush.list, &q->flush_data_in_flight); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		list_add(&rq->queuelist, &q->queue_head); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		queued = true; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		break; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+	case REQ_FSEQ_DONE: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		/* 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		 * @rq was previously adjusted by blk_flush_issue() for 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		 * flush sequencing and may already have gone through the 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		 * flush data request completion path.  Restore @rq for 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		 * normal completion and end it. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+		 */ 
			 |