nvme: Remove SCSI translations
[linux] / drivers / nvme / host / core.c
index a609264..8227431 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/nvme_ioctl.h>
 #include <linux/t10-pi.h>
 #include <linux/pm_qos.h>
-#include <scsi/sg.h>
 #include <asm/unaligned.h>
 
 #include "nvme.h"
@@ -45,7 +44,7 @@ module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
 EXPORT_SYMBOL_GPL(nvme_io_timeout);
 
-unsigned char shutdown_timeout = 5;
+static unsigned char shutdown_timeout = 5;
 module_param(shutdown_timeout, byte, 0644);
 MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
 
@@ -56,7 +55,7 @@ MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
 static int nvme_char_major;
 module_param(nvme_char_major, int, 0);
 
-static unsigned long default_ps_max_latency_us = 25000;
+static unsigned long default_ps_max_latency_us = 100000;
 module_param(default_ps_max_latency_us, ulong, 0644);
 MODULE_PARM_DESC(default_ps_max_latency_us,
                 "max power saving latency for new devices; use PM QOS to change per device");
@@ -65,34 +64,53 @@ static bool force_apst;
 module_param(force_apst, bool, 0644);
 MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
 
+static bool streams;
+module_param(streams, bool, 0644);
+MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
+
+struct workqueue_struct *nvme_wq;
+EXPORT_SYMBOL_GPL(nvme_wq);
+
 static LIST_HEAD(nvme_ctrl_list);
 static DEFINE_SPINLOCK(dev_list_lock);
 
 static struct class *nvme_class;
 
-static int nvme_error_status(struct request *req)
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+       if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+               return -EBUSY;
+       if (!queue_work(nvme_wq, &ctrl->reset_work))
+               return -EBUSY;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
+
+static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+{
+       int ret;
+
+       ret = nvme_reset_ctrl(ctrl);
+       if (!ret)
+               flush_work(&ctrl->reset_work);
+       return ret;
+}
+
+static blk_status_t nvme_error_status(struct request *req)
 {
        switch (nvme_req(req)->status & 0x7ff) {
        case NVME_SC_SUCCESS:
-               return 0;
+               return BLK_STS_OK;
        case NVME_SC_CAP_EXCEEDED:
-               return -ENOSPC;
-       default:
-               return -EIO;
-
-       /*
-        * XXX: these errors are a nasty side-band protocol to
-        * drivers/md/dm-mpath.c:noretry_error() that aren't documented
-        * anywhere..
-        */
-       case NVME_SC_CMD_SEQ_ERROR:
-               return -EILSEQ;
+               return BLK_STS_NOSPC;
        case NVME_SC_ONCS_NOT_SUPPORTED:
-               return -EOPNOTSUPP;
+               return BLK_STS_NOTSUPP;
        case NVME_SC_WRITE_FAULT:
        case NVME_SC_READ_ERROR:
        case NVME_SC_UNWRITTEN_BLOCK:
-               return -ENODATA;
+               return BLK_STS_MEDIUM;
+       default:
+               return BLK_STS_IOERR;
        }
 }
 
@@ -165,7 +183,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                switch (old_state) {
                case NVME_CTRL_NEW:
                case NVME_CTRL_LIVE:
-               case NVME_CTRL_RECONNECTING:
                        changed = true;
                        /* FALLTHRU */
                default:
@@ -283,6 +300,105 @@ struct request *nvme_alloc_request(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(nvme_alloc_request);
 
+static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
+{
+       struct nvme_command c;
+
+       memset(&c, 0, sizeof(c));
+
+       c.directive.opcode = nvme_admin_directive_send;
+       c.directive.nsid = cpu_to_le32(0xffffffff);
+       c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
+       c.directive.dtype = NVME_DIR_IDENTIFY;
+       c.directive.tdtype = NVME_DIR_STREAMS;
+       c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
+
+       return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
+}
+
+static int nvme_disable_streams(struct nvme_ctrl *ctrl)
+{
+       return nvme_toggle_streams(ctrl, false);
+}
+
+static int nvme_enable_streams(struct nvme_ctrl *ctrl)
+{
+       return nvme_toggle_streams(ctrl, true);
+}
+
+static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
+                                 struct streams_directive_params *s, u32 nsid)
+{
+       struct nvme_command c;
+
+       memset(&c, 0, sizeof(c));
+       memset(s, 0, sizeof(*s));
+
+       c.directive.opcode = nvme_admin_directive_recv;
+       c.directive.nsid = cpu_to_le32(nsid);
+       c.directive.numd = sizeof(*s);
+       c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
+       c.directive.dtype = NVME_DIR_STREAMS;
+
+       return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
+}
+
+static int nvme_configure_directives(struct nvme_ctrl *ctrl)
+{
+       struct streams_directive_params s;
+       int ret;
+
+       if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
+               return 0;
+       if (!streams)
+               return 0;
+
+       ret = nvme_enable_streams(ctrl);
+       if (ret)
+               return ret;
+
+       ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
+       if (ret)
+               return ret;
+
+       ctrl->nssa = le16_to_cpu(s.nssa);
+       if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
+               dev_info(ctrl->device, "too few streams (%u) available\n",
+                                       ctrl->nssa);
+               nvme_disable_streams(ctrl);
+               return 0;
+       }
+
+       ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
+       dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
+       return 0;
+}
+
+/*
+ * Check if 'req' has a write hint associated with it. If it does, assign
+ * a valid namespace stream to the write.
+ */
+static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
+                                    struct request *req, u16 *control,
+                                    u32 *dsmgmt)
+{
+       enum rw_hint streamid = req->write_hint;
+
+       if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
+               streamid = 0;
+       else {
+               streamid--;
+               if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
+                       return;
+
+               *control |= NVME_RW_DTYPE_STREAMS;
+               *dsmgmt |= streamid << 16;
+       }
+
+       if (streamid < ARRAY_SIZE(req->q->write_hints))
+               req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
+}
+
 static inline void nvme_setup_flush(struct nvme_ns *ns,
                struct nvme_command *cmnd)
 {
@@ -291,7 +407,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
        cmnd->common.nsid = cpu_to_le32(ns->ns_id);
 }
 
-static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
                struct nvme_command *cmnd)
 {
        unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
@@ -300,7 +416,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
        range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
        if (!range)
-               return BLK_MQ_RQ_QUEUE_BUSY;
+               return BLK_STS_RESOURCE;
 
        __rq_for_each_bio(bio, req) {
                u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -314,7 +430,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
        if (WARN_ON_ONCE(n != segments)) {
                kfree(range);
-               return BLK_MQ_RQ_QUEUE_ERROR;
+               return BLK_STS_IOERR;
        }
 
        memset(cmnd, 0, sizeof(*cmnd));
@@ -328,15 +444,26 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
        req->special_vec.bv_len = sizeof(*range) * segments;
        req->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 }
 
-static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
-               struct nvme_command *cmnd)
+static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
+               struct request *req, struct nvme_command *cmnd)
 {
+       struct nvme_ctrl *ctrl = ns->ctrl;
        u16 control = 0;
        u32 dsmgmt = 0;
 
+       /*
+        * If formated with metadata, require the block layer provide a buffer
+        * unless this namespace is formated such that the metadata can be
+        * stripped/generated by the controller with PRACT=1.
+        */
+       if (ns && ns->ms &&
+           (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
+           !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
+               return BLK_STS_NOTSUPP;
+
        if (req->cmd_flags & REQ_FUA)
                control |= NVME_RW_FUA;
        if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -351,6 +478,9 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
        cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
        cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
 
+       if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
+               nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
+
        if (ns->ms) {
                switch (ns->pi_type) {
                case NVME_NS_DPS_PI_TYPE3:
@@ -370,12 +500,13 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
 
        cmnd->rw.control = cpu_to_le16(control);
        cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+       return 0;
 }
 
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
                struct nvme_command *cmd)
 {
-       int ret = BLK_MQ_RQ_QUEUE_OK;
+       blk_status_t ret = BLK_STS_OK;
 
        if (!(req->rq_flags & RQF_DONTPREP)) {
                nvme_req(req)->retries = 0;
@@ -398,11 +529,11 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
                break;
        case REQ_OP_READ:
        case REQ_OP_WRITE:
-               nvme_setup_rw(ns, req, cmd);
+               ret = nvme_setup_rw(ns, req, cmd);
                break;
        default:
                WARN_ON_ONCE(1);
-               return BLK_MQ_RQ_QUEUE_ERROR;
+               return BLK_STS_IOERR;
        }
 
        cmd->common.command_id = req->tag;
@@ -555,15 +686,16 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
                        result, timeout);
 }
 
-static void nvme_keep_alive_end_io(struct request *rq, int error)
+static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 {
        struct nvme_ctrl *ctrl = rq->end_io_data;
 
        blk_mq_free_request(rq);
 
-       if (error) {
+       if (status) {
                dev_err(ctrl->device,
-                       "failed nvme_keep_alive_end_io error=%d\n", error);
+                       "failed nvme_keep_alive_end_io error=%d\n",
+                               status);
                return;
        }
 
@@ -599,7 +731,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
        if (nvme_keep_alive(ctrl)) {
                /* allocation failure, reset the controller */
                dev_err(ctrl->device, "keep-alive failed\n");
-               ctrl->ops->reset_ctrl(ctrl);
+               nvme_reset_ctrl(ctrl);
                return;
        }
 }
@@ -623,7 +755,7 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
 
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 {
        struct nvme_command c = { };
        int error;
@@ -643,6 +775,77 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
        return error;
 }
 
+static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
+{
+       struct nvme_command c = { };
+       int status;
+       void *data;
+       int pos;
+       int len;
+
+       c.identify.opcode = nvme_admin_identify;
+       c.identify.nsid = cpu_to_le32(nsid);
+       c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
+
+       data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
+                                     NVME_IDENTIFY_DATA_SIZE);
+       if (status)
+               goto free_data;
+
+       for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
+               struct nvme_ns_id_desc *cur = data + pos;
+
+               if (cur->nidl == 0)
+                       break;
+
+               switch (cur->nidt) {
+               case NVME_NIDT_EUI64:
+                       if (cur->nidl != NVME_NIDT_EUI64_LEN) {
+                               dev_warn(ns->ctrl->device,
+                                        "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
+                                        cur->nidl);
+                               goto free_data;
+                       }
+                       len = NVME_NIDT_EUI64_LEN;
+                       memcpy(ns->eui, data + pos + sizeof(*cur), len);
+                       break;
+               case NVME_NIDT_NGUID:
+                       if (cur->nidl != NVME_NIDT_NGUID_LEN) {
+                               dev_warn(ns->ctrl->device,
+                                        "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
+                                        cur->nidl);
+                               goto free_data;
+                       }
+                       len = NVME_NIDT_NGUID_LEN;
+                       memcpy(ns->nguid, data + pos + sizeof(*cur), len);
+                       break;
+               case NVME_NIDT_UUID:
+                       if (cur->nidl != NVME_NIDT_UUID_LEN) {
+                               dev_warn(ns->ctrl->device,
+                                        "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
+                                        cur->nidl);
+                               goto free_data;
+                       }
+                       len = NVME_NIDT_UUID_LEN;
+                       uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
+                       break;
+               default:
+                       /* Skip unnkown types */
+                       len = cur->nidl;
+                       break;
+               }
+
+               len += sizeof(*cur);
+       }
+free_data:
+       kfree(data);
+       return status;
+}
+
 static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
 {
        struct nvme_command c = { };
@@ -653,7 +856,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
        return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
 }
 
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
+static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
                struct nvme_id_ns **id)
 {
        struct nvme_command c = { };
@@ -675,26 +878,7 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
        return error;
 }
 
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
-                     void *buffer, size_t buflen, u32 *result)
-{
-       struct nvme_command c;
-       union nvme_result res;
-       int ret;
-
-       memset(&c, 0, sizeof(c));
-       c.features.opcode = nvme_admin_get_features;
-       c.features.nsid = cpu_to_le32(nsid);
-       c.features.fid = cpu_to_le32(fid);
-
-       ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0,
-                       NVME_QID_ANY, 0, 0);
-       if (ret >= 0 && result)
-               *result = le32_to_cpu(res.u32);
-       return ret;
-}
-
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
                      void *buffer, size_t buflen, u32 *result)
 {
        struct nvme_command c;
@@ -713,28 +897,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
        return ret;
 }
 
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
-{
-       struct nvme_command c = { };
-       int error;
-
-       c.common.opcode = nvme_admin_get_log_page,
-       c.common.nsid = cpu_to_le32(0xFFFFFFFF),
-       c.common.cdw10[0] = cpu_to_le32(
-                       (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
-                        NVME_LOG_SMART),
-
-       *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
-       if (!*log)
-               return -ENOMEM;
-
-       error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
-                       sizeof(struct nvme_smart_log));
-       if (error)
-               kfree(*log);
-       return error;
-}
-
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
 {
        u32 q_count = (*count - 1) | ((*count - 1) << 16);
@@ -752,7 +914,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
         * access to the admin queue, as that might be only way to fix them up.
         */
        if (status > 0) {
-               dev_err(ctrl->dev, "Could not set queue count (%d)\n", status);
+               dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
                *count = 0;
        } else {
                nr_io_queues = min(result & 0xffff, result >> 16) + 1;
@@ -870,12 +1032,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
                return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
        case NVME_IOCTL_SUBMIT_IO:
                return nvme_submit_io(ns, (void __user *)arg);
-#ifdef CONFIG_BLK_DEV_NVME_SCSI
-       case SG_GET_VERSION_NUM:
-               return nvme_sg_get_version_num((void __user *)arg);
-       case SG_IO:
-               return nvme_sg_io(ns, (void __user *)arg);
-#endif
        default:
 #ifdef CONFIG_NVM
                if (ns->ndev)
@@ -892,10 +1048,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
                        unsigned int cmd, unsigned long arg)
 {
-       switch (cmd) {
-       case SG_IO:
-               return -ENOIOCTLCMD;
-       }
        return nvme_ioctl(bdev, mode, cmd, arg);
 }
 #else
@@ -983,6 +1135,12 @@ static void nvme_init_integrity(struct nvme_ns *ns)
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
+static void nvme_set_chunk_size(struct nvme_ns *ns)
+{
+       u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+       blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
+}
+
 static void nvme_config_discard(struct nvme_ns *ns)
 {
        struct nvme_ctrl *ctrl = ns->ctrl;
@@ -991,8 +1149,15 @@ static void nvme_config_discard(struct nvme_ns *ns)
        BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                        NVME_DSM_MAX_RANGES);
 
-       ns->queue->limits.discard_alignment = logical_block_size;
-       ns->queue->limits.discard_granularity = logical_block_size;
+       if (ctrl->nr_streams && ns->sws && ns->sgs) {
+               unsigned int sz = logical_block_size * ns->sws * ns->sgs;
+
+               ns->queue->limits.discard_alignment = sz;
+               ns->queue->limits.discard_granularity = sz;
+       } else {
+               ns->queue->limits.discard_alignment = logical_block_size;
+               ns->queue->limits.discard_granularity = logical_block_size;
+       }
        blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
        blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
@@ -1016,7 +1181,15 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
        if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
                memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
        if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
-               memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid));
+               memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
+       if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
+                /* Don't treat error as fatal we potentially
+                 * already have a NGUID or EUI-64
+                 */
+               if (nvme_identify_ns_descs(ns, ns->ns_id))
+                       dev_warn(ns->ctrl->device,
+                                "%s: Identify Descriptors failed\n", __func__);
+       }
 
        return 0;
 }
@@ -1024,6 +1197,7 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
 static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 {
        struct nvme_ns *ns = disk->private_data;
+       struct nvme_ctrl *ctrl = ns->ctrl;
        u16 bs;
 
        /*
@@ -1034,12 +1208,15 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
        if (ns->lba_shift == 0)
                ns->lba_shift = 9;
        bs = 1 << ns->lba_shift;
+       ns->noiob = le16_to_cpu(id->noiob);
 
        blk_mq_freeze_queue(disk->queue);
 
-       if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+       if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
                nvme_prep_integrity(disk, id, bs);
        blk_queue_logical_block_size(ns->queue, bs);
+       if (ns->noiob)
+               nvme_set_chunk_size(ns);
        if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
                nvme_init_integrity(ns);
        if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
@@ -1047,7 +1224,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
        else
                set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
 
-       if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+       if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
                nvme_config_discard(ns);
        blk_mq_unfreeze_queue(disk->queue);
 }
@@ -1283,7 +1460,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
 
 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
 {
-       unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
+       unsigned long timeout = jiffies + (shutdown_timeout * HZ);
        u32 csts;
        int ret;
 
@@ -1342,7 +1519,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
         * transitioning between power states.  Therefore, when running
         * in any given state, we will enter the next lower-power
         * non-operational state after waiting 50 * (enlat + exlat)
-        * microseconds, as long as that state's total latency is under
+        * microseconds, as long as that state's exit latency is under
         * the requested maximum latency.
         *
         * We will not autonomously enter any non-operational state for
@@ -1387,7 +1564,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
                 * lowest-power state, not the number of states.
                 */
                for (state = (int)ctrl->npss; state >= 0; state--) {
-                       u64 total_latency_us, transition_ms;
+                       u64 total_latency_us, exit_latency_us, transition_ms;
 
                        if (target)
                                table->entries[state] = target;
@@ -1408,12 +1585,15 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
                              NVME_PS_FLAGS_NON_OP_STATE))
                                continue;
 
-                       total_latency_us =
-                               (u64)le32_to_cpu(ctrl->psd[state].entry_lat) +
-                               + le32_to_cpu(ctrl->psd[state].exit_lat);
-                       if (total_latency_us > ctrl->ps_max_latency_us)
+                       exit_latency_us =
+                               (u64)le32_to_cpu(ctrl->psd[state].exit_lat);
+                       if (exit_latency_us > ctrl->ps_max_latency_us)
                                continue;
 
+                       total_latency_us =
+                               exit_latency_us +
+                               le32_to_cpu(ctrl->psd[state].entry_lat);
+
                        /*
                         * This state is good.  Use it as the APST idle
                         * target for higher power states.
@@ -1579,7 +1759,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        }
 
        if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
-               dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
+               dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
                ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
        }
 
@@ -1607,7 +1787,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        prev_apsta = ctrl->apsta;
        if (ctrl->quirks & NVME_QUIRK_NO_APST) {
                if (force_apst && id->apsta) {
-                       dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
+                       dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
                        ctrl->apsta = 1;
                } else {
                        ctrl->apsta = 0;
@@ -1631,12 +1811,14 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
                        ret = -EINVAL;
 
                if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
-                       dev_err(ctrl->dev,
+                       dev_err(ctrl->device,
                                "keep-alive support is mandatory for fabrics\n");
                        ret = -EINVAL;
                }
        } else {
                ctrl->cntlid = le16_to_cpu(id->cntlid);
+               ctrl->hmpre = le32_to_cpu(id->hmpre);
+               ctrl->hmmin = le32_to_cpu(id->hmmin);
        }
 
        kfree(id);
@@ -1647,6 +1829,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
                dev_pm_qos_hide_latency_tolerance(ctrl->device);
 
        nvme_configure_apst(ctrl);
+       nvme_configure_directives(ctrl);
 
        ctrl->identified = true;
 
@@ -1732,7 +1915,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
                return nvme_dev_user_cmd(ctrl, argp);
        case NVME_IOCTL_RESET:
                dev_warn(ctrl->device, "resetting controller\n");
-               return ctrl->ops->reset_ctrl(ctrl);
+               return nvme_reset_ctrl_sync(ctrl);
        case NVME_IOCTL_SUBSYS_RESET:
                return nvme_reset_subsystem(ctrl);
        case NVME_IOCTL_RESCAN:
@@ -1758,7 +1941,7 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
        struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
        int ret;
 
-       ret = ctrl->ops->reset_ctrl(ctrl);
+       ret = nvme_reset_ctrl_sync(ctrl);
        if (ret < 0)
                return ret;
        return count;
@@ -1784,8 +1967,8 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
        int serial_len = sizeof(ctrl->serial);
        int model_len = sizeof(ctrl->model);
 
-       if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
-               return sprintf(buf, "eui.%16phN\n", ns->uuid);
+       if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+               return sprintf(buf, "eui.%16phN\n", ns->nguid);
 
        if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
                return sprintf(buf, "eui.%8phN\n", ns->eui);
@@ -1800,11 +1983,28 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
 
+static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
+                         char *buf)
+{
+       struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+       return sprintf(buf, "%pU\n", ns->nguid);
+}
+static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
+
 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
                                                                char *buf)
 {
        struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
-       return sprintf(buf, "%pU\n", ns->uuid);
+
+       /* For backward compatibility expose the NGUID to userspace if
+        * we have no UUID set
+        */
+       if (uuid_is_null(&ns->uuid)) {
+               printk_ratelimited(KERN_WARNING
+                                  "No UUID available providing old NGUID\n");
+               return sprintf(buf, "%pU\n", ns->nguid);
+       }
+       return sprintf(buf, "%pU\n", &ns->uuid);
 }
 static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
 
@@ -1827,6 +2027,7 @@ static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
 static struct attribute *nvme_ns_attrs[] = {
        &dev_attr_wwid.attr,
        &dev_attr_uuid.attr,
+       &dev_attr_nguid.attr,
        &dev_attr_eui.attr,
        &dev_attr_nsid.attr,
        NULL,
@@ -1839,7 +2040,12 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
        struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
 
        if (a == &dev_attr_uuid.attr) {
-               if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+               if (uuid_is_null(&ns->uuid) ||
+                   !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+                       return 0;
+       }
+       if (a == &dev_attr_nguid.attr) {
+               if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
                        return 0;
        }
        if (a == &dev_attr_eui.attr) {
@@ -2016,6 +2222,32 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        return ret;
 }
 
+static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+       struct streams_directive_params s;
+       int ret;
+
+       if (!ctrl->nr_streams)
+               return 0;
+
+       ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
+       if (ret)
+               return ret;
+
+       ns->sws = le32_to_cpu(s.sws);
+       ns->sgs = le16_to_cpu(s.sgs);
+
+       if (ns->sws) {
+               unsigned int bs = 1 << ns->lba_shift;
+
+               blk_queue_io_min(ns->queue, bs * ns->sws);
+               if (ns->sgs)
+                       blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
+       }
+
+       return 0;
+}
+
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
        struct nvme_ns *ns;
@@ -2045,6 +2277,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
        nvme_set_queue_limits(ctrl, ns->queue);
+       nvme_setup_streams_ns(ctrl, ns);
 
        sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
 
@@ -2053,7 +2286,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        if (nvme_nvm_ns_supported(ns, id) &&
                                nvme_nvm_register(ns, disk_name, node)) {
-               dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__);
+               dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
                goto out_free_id;
        }
 
@@ -2228,7 +2461,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
         * removal.
         */
        if (ctrl->state == NVME_CTRL_LIVE)
-               schedule_work(&ctrl->scan_work);
+               queue_work(nvme_wq, &ctrl->scan_work);
 }
 EXPORT_SYMBOL_GPL(nvme_queue_scan);
 
@@ -2283,7 +2516,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
                /*FALLTHRU*/
        case NVME_SC_ABORT_REQ:
                ++ctrl->event_limit;
-               schedule_work(&ctrl->async_event_work);
+               queue_work(nvme_wq, &ctrl->async_event_work);
                break;
        default:
                break;
@@ -2306,7 +2539,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
 void nvme_queue_async_events(struct nvme_ctrl *ctrl)
 {
        ctrl->event_limit = NVME_NR_AERS;
-       schedule_work(&ctrl->async_event_work);
+       queue_work(nvme_wq, &ctrl->async_event_work);
 }
 EXPORT_SYMBOL_GPL(nvme_queue_async_events);
 
@@ -2438,6 +2671,13 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
        struct nvme_ns *ns;
 
        mutex_lock(&ctrl->namespaces_mutex);
+
+       /* Forcibly unquiesce queues to avoid blocking dispatch */
+       blk_mq_unquiesce_queue(ctrl->admin_q);
+
+       /* Forcibly start all queues to avoid having stuck requests */
+       blk_mq_start_hw_queues(ctrl->admin_q);
+
        list_for_each_entry(ns, &ctrl->namespaces, list) {
                /*
                 * Revalidating a dead namespace sets capacity to 0. This will
@@ -2448,6 +2688,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
                revalidate_disk(ns->disk);
                blk_set_queue_dying(ns->queue);
 
+               /* Forcibly unquiesce queues to avoid blocking dispatch */
+               blk_mq_unquiesce_queue(ns->queue);
+
                /*
                 * Forcibly start all queues to avoid having stuck requests.
                 * Note that we must ensure the queues are not stopped
@@ -2526,7 +2769,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 
        mutex_lock(&ctrl->namespaces_mutex);
        list_for_each_entry(ns, &ctrl->namespaces, list) {
-               blk_mq_start_stopped_hw_queues(ns->queue, true);
+               blk_mq_unquiesce_queue(ns->queue);
                blk_mq_kick_requeue_list(ns->queue);
        }
        mutex_unlock(&ctrl->namespaces_mutex);
@@ -2537,10 +2780,15 @@ int __init nvme_core_init(void)
 {
        int result;
 
+       nvme_wq = alloc_workqueue("nvme-wq",
+                       WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+       if (!nvme_wq)
+               return -ENOMEM;
+
        result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
                                                        &nvme_dev_fops);
        if (result < 0)
-               return result;
+               goto destroy_wq;
        else if (result > 0)
                nvme_char_major = result;
 
@@ -2552,8 +2800,10 @@ int __init nvme_core_init(void)
 
        return 0;
 
- unregister_chrdev:
+unregister_chrdev:
        __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+destroy_wq:
+       destroy_workqueue(nvme_wq);
        return result;
 }
 
@@ -2561,6 +2811,7 @@ void nvme_core_exit(void)
 {
        class_destroy(nvme_class);
        __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+       destroy_workqueue(nvme_wq);
 }
 
 MODULE_LICENSE("GPL");