Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * The Kyber I/O scheduler. Controls latency by throttling queue depths using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  * scalable techniques.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  * Copyright (C) 2017 Facebook
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/blk-mq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/elevator.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/sbitmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include "blk.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include "blk-mq.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include "blk-mq-debugfs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include "blk-mq-sched.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include "blk-mq-tag.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <trace/events/kyber.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26)  * Scheduling domains: the device is divided into multiple domains based on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27)  * request type.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) 	KYBER_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) 	KYBER_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) 	KYBER_DISCARD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) 	KYBER_OTHER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) 	KYBER_NUM_DOMAINS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) static const char *kyber_domain_names[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) 	[KYBER_READ] = "READ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) 	[KYBER_WRITE] = "WRITE",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) 	[KYBER_DISCARD] = "DISCARD",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) 	[KYBER_OTHER] = "OTHER",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) 	 * In order to prevent starvation of synchronous requests by a flood of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) 	 * asynchronous requests, we reserve 25% of requests for synchronous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 	 * operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) 	KYBER_ASYNC_PERCENT = 75,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54)  * Maximum device-wide depth for each scheduling domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56)  * Even for fast devices with lots of tags like NVMe, you can saturate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57)  * device with only a fraction of the maximum possible queue depth. So, we cap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58)  * these to a reasonable value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) static const unsigned int kyber_depth[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 	[KYBER_READ] = 256,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 	[KYBER_WRITE] = 128,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) 	[KYBER_DISCARD] = 64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 	[KYBER_OTHER] = 16,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68)  * Default latency targets for each scheduling domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) static const u64 kyber_latency_targets[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 	[KYBER_READ] = 2ULL * NSEC_PER_MSEC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 	[KYBER_WRITE] = 10ULL * NSEC_PER_MSEC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 	[KYBER_DISCARD] = 5ULL * NSEC_PER_SEC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77)  * Batch size (number of requests we'll dispatch in a row) for each scheduling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78)  * domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) static const unsigned int kyber_batch_size[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 	[KYBER_READ] = 16,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	[KYBER_WRITE] = 8,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	[KYBER_DISCARD] = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	[KYBER_OTHER] = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88)  * Requests latencies are recorded in a histogram with buckets defined relative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89)  * to the target latency:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91)  * <= 1/4 * target latency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92)  * <= 1/2 * target latency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93)  * <= 3/4 * target latency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94)  * <= target latency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95)  * <= 1 1/4 * target latency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96)  * <= 1 1/2 * target latency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97)  * <= 1 3/4 * target latency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98)  * > 1 3/4 * target latency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	 * The width of the latency histogram buckets is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	 * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 	KYBER_LATENCY_SHIFT = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 	 * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	 * thus, "good".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	/* There are also (1 << KYBER_LATENCY_SHIFT) "bad" buckets. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116)  * We measure both the total latency and the I/O latency (i.e., latency after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117)  * submitting to the device).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	KYBER_TOTAL_LATENCY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	KYBER_IO_LATENCY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) static const char *kyber_latency_type_names[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	[KYBER_TOTAL_LATENCY] = "total",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	[KYBER_IO_LATENCY] = "I/O",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130)  * Per-cpu latency histograms: total latency and I/O latency for each scheduling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131)  * domain except for KYBER_OTHER.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) struct kyber_cpu_latency {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138)  * There is a same mapping between ctx & hctx and kcq & khd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139)  * we use request->mq_ctx->index_hw to index the kcq in khd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) struct kyber_ctx_queue {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	 * Used to ensure operations on rq_list and kcq_map to be an atmoic one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	 * Also protect the rqs on rq_list when merge.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 	struct list_head rq_list[KYBER_NUM_DOMAINS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) } ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) struct kyber_queue_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	 * Each scheduling domain has a limited number of in-flight requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	 * device-wide, limited by these tokens.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 	struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	 * Async request percentage, converted to per-word depth for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	 * sbitmap_get_shallow().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	unsigned int async_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 	struct kyber_cpu_latency __percpu *cpu_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 	/* Timer for stats aggregation and adjusting domain tokens. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	struct timer_list timer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 	unsigned long latency_timeout[KYBER_OTHER];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	int domain_p99[KYBER_OTHER];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 	/* Target latencies in nanoseconds. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	u64 latency_targets[KYBER_OTHER];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) struct kyber_hctx_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 	spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	struct list_head rqs[KYBER_NUM_DOMAINS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 	unsigned int cur_domain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	unsigned int batching;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	struct kyber_ctx_queue *kcqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	struct sbitmap kcq_map[KYBER_NUM_DOMAINS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	struct sbq_wait domain_wait[KYBER_NUM_DOMAINS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 	struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 	atomic_t wait_index[KYBER_NUM_DOMAINS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 			     void *key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) static unsigned int kyber_sched_domain(unsigned int op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	switch (op & REQ_OP_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	case REQ_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 		return KYBER_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 	case REQ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 		return KYBER_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 	case REQ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 		return KYBER_DISCARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 		return KYBER_OTHER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) static void flush_latency_buckets(struct kyber_queue_data *kqd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 				  struct kyber_cpu_latency *cpu_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 				  unsigned int sched_domain, unsigned int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	unsigned int bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 		buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222)  * Calculate the histogram bucket with the given percentile rank, or -1 if there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223)  * aren't enough samples yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) static int calculate_percentile(struct kyber_queue_data *kqd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 				unsigned int sched_domain, unsigned int type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 				unsigned int percentile)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 	unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	unsigned int bucket, samples = 0, percentile_samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 		samples += buckets[bucket];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	if (!samples)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	 * We do the calculation once we have 500 samples or one second passes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	 * since the first sample was recorded, whichever comes first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	if (!kqd->latency_timeout[sched_domain])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 		kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	if (samples < 500 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	    time_is_after_jiffies(kqd->latency_timeout[sched_domain])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	kqd->latency_timeout[sched_domain] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 	percentile_samples = DIV_ROUND_UP(samples * percentile, 100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 		if (buckets[bucket] >= percentile_samples)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 		percentile_samples -= buckets[bucket];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 	memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	trace_kyber_latency(kqd->q, kyber_domain_names[sched_domain],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 			    kyber_latency_type_names[type], percentile,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 			    bucket + 1, 1 << KYBER_LATENCY_SHIFT, samples);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	return bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) static void kyber_resize_domain(struct kyber_queue_data *kqd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 				unsigned int sched_domain, unsigned int depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 	depth = clamp(depth, 1U, kyber_depth[sched_domain]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 	if (depth != kqd->domain_tokens[sched_domain].sb.depth) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 		sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		trace_kyber_adjust(kqd->q, kyber_domain_names[sched_domain],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 				   depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) static void kyber_timer_fn(struct timer_list *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 	struct kyber_queue_data *kqd = from_timer(kqd, t, timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 	unsigned int sched_domain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	bool bad = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 	/* Sum all of the per-cpu latency histograms. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 	for_each_online_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 		struct kyber_cpu_latency *cpu_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 		cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 		for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 			flush_latency_buckets(kqd, cpu_latency, sched_domain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 					      KYBER_TOTAL_LATENCY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 			flush_latency_buckets(kqd, cpu_latency, sched_domain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 					      KYBER_IO_LATENCY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	 * Check if any domains have a high I/O latency, which might indicate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	 * congestion in the device. Note that we use the p90; we don't want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	 * be too sensitive to outliers here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 		int p90;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 		p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 					   90);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 		if (p90 >= KYBER_GOOD_BUCKETS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 			bad = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 	 * Adjust the scheduling domain depths. If we determined that there was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	 * congestion, we throttle all domains with good latencies. Either way,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 	 * we ease up on throttling domains with bad latencies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 		unsigned int orig_depth, depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 		int p99;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 		p99 = calculate_percentile(kqd, sched_domain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 					   KYBER_TOTAL_LATENCY, 99);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 		 * This is kind of subtle: different domains will not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 		 * necessarily have enough samples to calculate the latency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 		 * percentiles during the same window, so we have to remember
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 		 * the p99 for the next time we observe congestion; once we do,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 		 * we don't want to throttle again until we get more data, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 		 * reset it to -1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 		if (bad) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 			if (p99 < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 				p99 = kqd->domain_p99[sched_domain];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 			kqd->domain_p99[sched_domain] = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 		} else if (p99 >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 			kqd->domain_p99[sched_domain] = p99;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 		if (p99 < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 		 * If this domain has bad latency, throttle less. Otherwise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 		 * throttle more iff we determined that there is congestion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 		 * The new depth is scaled linearly with the p99 latency vs the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 		 * latency target. E.g., if the p99 is 3/4 of the target, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 		 * we throttle down to 3/4 of the current depth, and if the p99
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 		 * is 2x the target, then we double the depth.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 		if (bad || p99 >= KYBER_GOOD_BUCKETS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 			orig_depth = kqd->domain_tokens[sched_domain].sb.depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 			depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 			kyber_resize_domain(kqd, sched_domain, depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) static unsigned int kyber_sched_tags_shift(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	 * All of the hardware queues have the same depth, so we can just grab
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	 * the shift of the first one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	struct kyber_queue_data *kqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 	unsigned int shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	int ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 	if (!kqd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	kqd->q = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 					    GFP_KERNEL | __GFP_ZERO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	if (!kqd->cpu_latency)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 		goto err_kqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	timer_setup(&kqd->timer, kyber_timer_fn, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 		WARN_ON(!kyber_depth[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 		WARN_ON(!kyber_batch_size[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 		ret = sbitmap_queue_init_node(&kqd->domain_tokens[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 					      kyber_depth[i], -1, false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 					      GFP_KERNEL, q->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 			while (--i >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 				sbitmap_queue_free(&kqd->domain_tokens[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 			goto err_buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	for (i = 0; i < KYBER_OTHER; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 		kqd->domain_p99[i] = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 		kqd->latency_targets[i] = kyber_latency_targets[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	shift = kyber_sched_tags_shift(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	return kqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) err_buckets:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 	free_percpu(kqd->cpu_latency);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) err_kqd:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	kfree(kqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	struct kyber_queue_data *kqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 	struct elevator_queue *eq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	eq = elevator_alloc(q, e);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	if (!eq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	kqd = kyber_queue_data_alloc(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	if (IS_ERR(kqd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 		kobject_put(&eq->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 		return PTR_ERR(kqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	blk_stat_enable_accounting(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	eq->elevator_data = kqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 	q->elevator = eq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) static void kyber_exit_sched(struct elevator_queue *e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	struct kyber_queue_data *kqd = e->elevator_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	del_timer_sync(&kqd->timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 		sbitmap_queue_free(&kqd->domain_tokens[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	free_percpu(kqd->cpu_latency);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	kfree(kqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 	unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	spin_lock_init(&kcq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 		INIT_LIST_HEAD(&kcq->rq_list[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 	struct kyber_hctx_data *khd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 	khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	if (!khd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 	khd->kcqs = kmalloc_array_node(hctx->nr_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 				       sizeof(struct kyber_ctx_queue),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 				       GFP_KERNEL, hctx->numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	if (!khd->kcqs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 		goto err_khd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	for (i = 0; i < hctx->nr_ctx; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 		kyber_ctx_queue_init(&khd->kcqs[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 		if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 				      ilog2(8), GFP_KERNEL, hctx->numa_node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 			while (--i >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 				sbitmap_free(&khd->kcq_map[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 			goto err_kcqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	spin_lock_init(&khd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 		INIT_LIST_HEAD(&khd->rqs[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 		khd->domain_wait[i].sbq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 		init_waitqueue_func_entry(&khd->domain_wait[i].wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 					  kyber_domain_wake);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 		khd->domain_wait[i].wait.private = hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 		INIT_LIST_HEAD(&khd->domain_wait[i].wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 		atomic_set(&khd->wait_index[i], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	khd->cur_domain = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 	khd->batching = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	hctx->sched_data = khd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 					kqd->async_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) err_kcqs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	kfree(khd->kcqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) err_khd:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 	kfree(khd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	struct kyber_hctx_data *khd = hctx->sched_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 		sbitmap_free(&khd->kcq_map[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 	kfree(khd->kcqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 	kfree(hctx->sched_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) static int rq_get_domain_token(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 	return (long)rq->elv.priv[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) static void rq_set_domain_token(struct request *rq, int token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 	rq->elv.priv[0] = (void *)(long)token;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) static void rq_clear_domain_token(struct kyber_queue_data *kqd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 				  struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	unsigned int sched_domain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	nr = rq_get_domain_token(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	if (nr != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 		sched_domain = kyber_sched_domain(rq->cmd_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 		sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 				    rq->mq_ctx->cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 	 * We use the scheduler tags as per-hardware queue queueing tokens.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 	 * Async requests can be limited at this stage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 	if (!op_is_sync(op)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 		struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 		data->shallow_depth = kqd->async_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) static bool kyber_bio_merge(struct request_queue *q, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 		unsigned int nr_segs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 	struct kyber_hctx_data *khd = hctx->sched_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 	struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 	unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	struct list_head *rq_list = &kcq->rq_list[sched_domain];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	bool merged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 	spin_lock(&kcq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 	merged = blk_bio_list_merge(hctx->queue, rq_list, bio, nr_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 	spin_unlock(&kcq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	return merged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) static void kyber_prepare_request(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	rq_set_domain_token(rq, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 				  struct list_head *rq_list, bool at_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 	struct kyber_hctx_data *khd = hctx->sched_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 	struct request *rq, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	list_for_each_entry_safe(rq, next, rq_list, queuelist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 		unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 		struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw[hctx->type]];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 		struct list_head *head = &kcq->rq_list[sched_domain];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 		spin_lock(&kcq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		if (at_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 			list_move(&rq->queuelist, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 			list_move_tail(&rq->queuelist, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 		sbitmap_set_bit(&khd->kcq_map[sched_domain],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 				rq->mq_ctx->index_hw[hctx->type]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 		blk_mq_sched_request_inserted(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 		spin_unlock(&kcq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) static void kyber_finish_request(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	rq_clear_domain_token(kqd, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) static void add_latency_sample(struct kyber_cpu_latency *cpu_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 			       unsigned int sched_domain, unsigned int type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 			       u64 target, u64 latency)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	unsigned int bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	u64 divisor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	if (latency > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 		divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 		bucket = min_t(unsigned int, div64_u64(latency - 1, divisor),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 			       KYBER_LATENCY_BUCKETS - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 		bucket = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) static void kyber_completed_request(struct request *rq, u64 now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	struct kyber_cpu_latency *cpu_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	unsigned int sched_domain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	u64 target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	sched_domain = kyber_sched_domain(rq->cmd_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 	if (sched_domain == KYBER_OTHER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	cpu_latency = get_cpu_ptr(kqd->cpu_latency);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 	target = kqd->latency_targets[sched_domain];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 	add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 			   target, now - rq->start_time_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 	add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 			   now - rq->io_start_time_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	put_cpu_ptr(kqd->cpu_latency);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	timer_reduce(&kqd->timer, jiffies + HZ / 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) struct flush_kcq_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	struct kyber_hctx_data *khd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 	unsigned int sched_domain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	struct list_head *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) static bool flush_busy_kcq(struct sbitmap *sb, unsigned int bitnr, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	struct flush_kcq_data *flush_data = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 	struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	spin_lock(&kcq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 	list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 			      flush_data->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 	sbitmap_clear_bit(sb, bitnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	spin_unlock(&kcq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) static void kyber_flush_busy_kcqs(struct kyber_hctx_data *khd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 				  unsigned int sched_domain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 				  struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 	struct flush_kcq_data data = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 		.khd = khd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 		.sched_domain = sched_domain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 		.list = list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 	sbitmap_for_each_set(&khd->kcq_map[sched_domain],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 			     flush_busy_kcq, &data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) static int kyber_domain_wake(wait_queue_entry_t *wqe, unsigned mode, int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 			     void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	struct blk_mq_hw_ctx *hctx = READ_ONCE(wqe->private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	struct sbq_wait *wait = container_of(wqe, struct sbq_wait, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 	sbitmap_del_wait_queue(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 	blk_mq_run_hw_queue(hctx, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) static int kyber_get_domain_token(struct kyber_queue_data *kqd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 				  struct kyber_hctx_data *khd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 				  struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 	unsigned int sched_domain = khd->cur_domain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	struct sbq_wait *wait = &khd->domain_wait[sched_domain];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 	struct sbq_wait_state *ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 	int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 	nr = __sbitmap_queue_get(domain_tokens);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	 * If we failed to get a domain token, make sure the hardware queue is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 	 * run when one becomes available. Note that this is serialized on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	 * khd->lock, but we still need to be careful about the waker.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 	if (nr < 0 && list_empty_careful(&wait->wait.entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 		ws = sbq_wait_ptr(domain_tokens,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 				  &khd->wait_index[sched_domain]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 		khd->domain_ws[sched_domain] = ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 		sbitmap_add_wait_queue(domain_tokens, ws, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 		 * Try again in case a token was freed before we got on the wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 		 * queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 		nr = __sbitmap_queue_get(domain_tokens);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	 * If we got a token while we were on the wait queue, remove ourselves
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	 * from the wait queue to ensure that all wake ups make forward
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	 * progress. It's possible that the waker already deleted the entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 	 * between the !list_empty_careful() check and us grabbing the lock, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	 * list_del_init() is okay with that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 	if (nr >= 0 && !list_empty_careful(&wait->wait.entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 		ws = khd->domain_ws[sched_domain];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 		spin_lock_irq(&ws->wait.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 		sbitmap_del_wait_queue(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 		spin_unlock_irq(&ws->wait.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	return nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) static struct request *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 			  struct kyber_hctx_data *khd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 			  struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	struct list_head *rqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	struct request *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	rqs = &khd->rqs[khd->cur_domain];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	 * If we already have a flushed request, then we just need to get a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	 * token for it. Otherwise, if there are pending requests in the kcqs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 	 * flush the kcqs, but only if we can get a token. If not, we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	 * leave the requests in the kcqs so that they can be merged. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 	 * khd->lock serializes the flushes, so if we observed any bit set in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	 * the kcq_map, we will always get a request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	rq = list_first_entry_or_null(rqs, struct request, queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 	if (rq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 		nr = kyber_get_domain_token(kqd, khd, hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 		if (nr >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 			khd->batching++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 			rq_set_domain_token(rq, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 			list_del_init(&rq->queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 			return rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 			trace_kyber_throttled(kqd->q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 					      kyber_domain_names[khd->cur_domain]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	} else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 		nr = kyber_get_domain_token(kqd, khd, hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 		if (nr >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 			kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 			rq = list_first_entry(rqs, struct request, queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 			khd->batching++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 			rq_set_domain_token(rq, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 			list_del_init(&rq->queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 			return rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 			trace_kyber_throttled(kqd->q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 					      kyber_domain_names[khd->cur_domain]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	/* There were either no pending requests or no tokens. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	struct kyber_hctx_data *khd = hctx->sched_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 	struct request *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 	spin_lock(&khd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 	 * First, if we are still entitled to batch, try to dispatch a request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 	 * from the batch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 	if (khd->batching < kyber_batch_size[khd->cur_domain]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 		rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 		if (rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 	 * Either,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	 * 1. We were no longer entitled to a batch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 	 * 2. The domain we were batching didn't have any requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	 * 3. The domain we were batching was out of tokens.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	 * Start another batch. Note that this wraps back around to the original
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	 * domain if no other domains have requests or tokens.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	khd->batching = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 		if (khd->cur_domain == KYBER_NUM_DOMAINS - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 			khd->cur_domain = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 			khd->cur_domain++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 		rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 		if (rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 	rq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 	spin_unlock(&khd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	return rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	struct kyber_hctx_data *khd = hctx->sched_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		if (!list_empty_careful(&khd->rqs[i]) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		    sbitmap_any_bit_set(&khd->kcq_map[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) #define KYBER_LAT_SHOW_STORE(domain, name)				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) static ssize_t kyber_##name##_lat_show(struct elevator_queue *e,	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 				       char *page)			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) {									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	struct kyber_queue_data *kqd = e->elevator_data;		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	return sprintf(page, "%llu\n", kqd->latency_targets[domain]);	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) }									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) static ssize_t kyber_##name##_lat_store(struct elevator_queue *e,	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 					const char *page, size_t count)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) {									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 	struct kyber_queue_data *kqd = e->elevator_data;		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 	unsigned long long nsec;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	int ret;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 	ret = kstrtoull(page, 10, &nsec);				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	if (ret)							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 		return ret;						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	kqd->latency_targets[domain] = nsec;				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	return count;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) KYBER_LAT_SHOW_STORE(KYBER_READ, read);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) KYBER_LAT_SHOW_STORE(KYBER_WRITE, write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) #undef KYBER_LAT_SHOW_STORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) #define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) static struct elv_fs_entry kyber_sched_attrs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	KYBER_LAT_ATTR(read),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	KYBER_LAT_ATTR(write),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	__ATTR_NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) #undef KYBER_LAT_ATTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) #ifdef CONFIG_BLK_DEBUG_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) #define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name)			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) static int kyber_##name##_tokens_show(void *data, struct seq_file *m)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) {									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	struct request_queue *q = data;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	struct kyber_queue_data *kqd = q->elevator->elevator_data;	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	sbitmap_queue_show(&kqd->domain_tokens[domain], m);		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	return 0;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) }									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) static void *kyber_##name##_rqs_start(struct seq_file *m, loff_t *pos)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	__acquires(&khd->lock)						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) {									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	struct blk_mq_hw_ctx *hctx = m->private;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	struct kyber_hctx_data *khd = hctx->sched_data;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	spin_lock(&khd->lock);						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	return seq_list_start(&khd->rqs[domain], *pos);			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) }									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) static void *kyber_##name##_rqs_next(struct seq_file *m, void *v,	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 				     loff_t *pos)			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) {									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 	struct blk_mq_hw_ctx *hctx = m->private;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	struct kyber_hctx_data *khd = hctx->sched_data;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	return seq_list_next(v, &khd->rqs[domain], pos);		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) }									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) static void kyber_##name##_rqs_stop(struct seq_file *m, void *v)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	__releases(&khd->lock)						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) {									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	struct blk_mq_hw_ctx *hctx = m->private;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	struct kyber_hctx_data *khd = hctx->sched_data;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	spin_unlock(&khd->lock);					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) }									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) static const struct seq_operations kyber_##name##_rqs_seq_ops = {	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	.start	= kyber_##name##_rqs_start,				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 	.next	= kyber_##name##_rqs_next,				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	.stop	= kyber_##name##_rqs_stop,				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	.show	= blk_mq_debugfs_rq_show,				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) };									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) static int kyber_##name##_waiting_show(void *data, struct seq_file *m)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) {									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	struct blk_mq_hw_ctx *hctx = data;				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 	struct kyber_hctx_data *khd = hctx->sched_data;			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	wait_queue_entry_t *wait = &khd->domain_wait[domain].wait;	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 	seq_printf(m, "%d\n", !list_empty_careful(&wait->entry));	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 	return 0;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) #undef KYBER_DEBUGFS_DOMAIN_ATTRS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) static int kyber_async_depth_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 	struct request_queue *q = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 	struct kyber_queue_data *kqd = q->elevator->elevator_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 	seq_printf(m, "%u\n", kqd->async_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) static int kyber_cur_domain_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	struct blk_mq_hw_ctx *hctx = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 	struct kyber_hctx_data *khd = hctx->sched_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 	seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) static int kyber_batching_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 	struct blk_mq_hw_ctx *hctx = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 	struct kyber_hctx_data *khd = hctx->sched_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	seq_printf(m, "%u\n", khd->batching);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) #define KYBER_QUEUE_DOMAIN_ATTRS(name)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 	{#name "_tokens", 0400, kyber_##name##_tokens_show}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	KYBER_QUEUE_DOMAIN_ATTRS(read),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	KYBER_QUEUE_DOMAIN_ATTRS(write),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	KYBER_QUEUE_DOMAIN_ATTRS(discard),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	KYBER_QUEUE_DOMAIN_ATTRS(other),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	{"async_depth", 0400, kyber_async_depth_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 	{},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) #undef KYBER_QUEUE_DOMAIN_ATTRS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) #define KYBER_HCTX_DOMAIN_ATTRS(name)					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 	{#name "_rqs", 0400, .seq_ops = &kyber_##name##_rqs_seq_ops},	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	{#name "_waiting", 0400, kyber_##name##_waiting_show}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	KYBER_HCTX_DOMAIN_ATTRS(read),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	KYBER_HCTX_DOMAIN_ATTRS(write),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 	KYBER_HCTX_DOMAIN_ATTRS(discard),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 	KYBER_HCTX_DOMAIN_ATTRS(other),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	{"cur_domain", 0400, kyber_cur_domain_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 	{"batching", 0400, kyber_batching_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 	{},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) #undef KYBER_HCTX_DOMAIN_ATTRS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) static struct elevator_type kyber_sched = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	.ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 		.init_sched = kyber_init_sched,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 		.exit_sched = kyber_exit_sched,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 		.init_hctx = kyber_init_hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 		.exit_hctx = kyber_exit_hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 		.limit_depth = kyber_limit_depth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 		.bio_merge = kyber_bio_merge,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 		.prepare_request = kyber_prepare_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 		.insert_requests = kyber_insert_requests,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 		.finish_request = kyber_finish_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 		.requeue_request = kyber_finish_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 		.completed_request = kyber_completed_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 		.dispatch_request = kyber_dispatch_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 		.has_work = kyber_has_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) #ifdef CONFIG_BLK_DEBUG_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 	.queue_debugfs_attrs = kyber_queue_debugfs_attrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 	.hctx_debugfs_attrs = kyber_hctx_debugfs_attrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 	.elevator_attrs = kyber_sched_attrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 	.elevator_name = "kyber",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 	.elevator_features = ELEVATOR_F_MQ_AWARE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	.elevator_owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) static int __init kyber_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	return elv_register(&kyber_sched);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) static void __exit kyber_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	elv_unregister(&kyber_sched);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) module_init(kyber_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) module_exit(kyber_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) MODULE_AUTHOR("Omar Sandoval");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) MODULE_LICENSE("GPL");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) MODULE_DESCRIPTION("Kyber I/O scheduler");