From 4bbf439b09c5ac3f8b3e9584fe080375d8d0ad2d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Nov 2020 14:40:37 -0500
Subject: [PATCH 1/6] fix return values of seq_read_iter()

Unlike ->read(), ->read_iter() instances *must* return the amount
of data they'd left in iterator.  For ->read() returning less than
it has actually copied is a QoI issue; read(fd, unmapped_page - 5, 8)
is allowed to fill all 5 bytes of destination and return 4; it's
not nice to caller, but POSIX allows pretty much anything in such
situation, up to and including a SIGSEGV.

generic_file_splice_read() uses pipe-backed iterator as destination;
there a short copy comes from pipe being full, not from running into
an un{mapped,writable} page in the middle of destination as we
have for iovec-backed iterators read(2) uses.  And there we rely
upon the ->read_iter() reporting the actual amount it has left
in destination.

Conversion of a ->read() instance into ->read_iter() has to watch
out for that.  If you really need an "all or nothing" kind of
behaviour somewhere, you need to do iov_iter_revert() to prune
the partial copy.

In case of seq_read_iter() we can handle short copy just fine;
the data is in m->buf and next call will fetch it from there.

Fixes: d4d50710a8b4 (seq_file: add seq_read_iter)
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/seq_file.c | 57 ++++++++++++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 30 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3b20e21604e7..03a369ccd28c 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -168,12 +168,14 @@ EXPORT_SYMBOL(seq_read);
 ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct seq_file *m = iocb->ki_filp->private_data;
-	size_t size = iov_iter_count(iter);
 	size_t copied = 0;
 	size_t n;
 	void *p;
 	int err = 0;
 
+	if (!iov_iter_count(iter))
+		return 0;
+
 	mutex_lock(&m->lock);
 
 	/*
@@ -206,36 +208,34 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 		if (!m->buf)
 			goto Enomem;
 	}
-	/* if not empty - flush it first */
+	// something left in the buffer - copy it out first
 	if (m->count) {
-		n = min(m->count, size);
-		if (copy_to_iter(m->buf + m->from, n, iter) != n)
-			goto Efault;
+		n = copy_to_iter(m->buf + m->from, m->count, iter);
 		m->count -= n;
 		m->from += n;
-		size -= n;
 		copied += n;
-		if (!size)
+		if (m->count)	// hadn't managed to copy everything
 			goto Done;
 	}
-	/* we need at least one record in buffer */
+	// get a non-empty record in the buffer
 	m->from = 0;
 	p = m->op->start(m, &m->index);
 	while (1) {
 		err = PTR_ERR(p);
-		if (!p || IS_ERR(p))
+		if (!p || IS_ERR(p))	// EOF or an error
 			break;
 		err = m->op->show(m, p);
-		if (err < 0)
+		if (err < 0)		// hard error
 			break;
-		if (unlikely(err))
+		if (unlikely(err))	// ->show() says "skip it"
 			m->count = 0;
-		if (unlikely(!m->count)) {
+		if (unlikely(!m->count)) { // empty record
 			p = m->op->next(m, p, &m->index);
 			continue;
 		}
-		if (m->count < m->size)
+		if (!seq_has_overflowed(m)) // got it
 			goto Fill;
+		// need a bigger buffer
 		m->op->stop(m, p);
 		kvfree(m->buf);
 		m->count = 0;
@@ -244,11 +244,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 			goto Enomem;
 		p = m->op->start(m, &m->index);
 	}
+	// EOF or an error
 	m->op->stop(m, p);
 	m->count = 0;
 	goto Done;
 Fill:
-	/* they want more? let's try to get some more */
+	// one non-empty record is in the buffer; if they want more,
+	// try to fit more in, but in any case we need to advance
+	// the iterator once for every record shown.
 	while (1) {
 		size_t offs = m->count;
 		loff_t pos = m->index;
@@ -259,30 +262,27 @@ Fill:
 					    m->op->next);
 			m->index++;
 		}
-		if (!p || IS_ERR(p)) {
-			err = PTR_ERR(p);
+		if (!p || IS_ERR(p))	// no next record for us
 			break;
-		}
-		if (m->count >= size)
+		if (m->count >= iov_iter_count(iter))
 			break;
 		err = m->op->show(m, p);
-		if (seq_has_overflowed(m) || err) {
+		if (err > 0) {		// ->show() says "skip it"
 			m->count = offs;
-			if (likely(err <= 0))
-				break;
+		} else if (err || seq_has_overflowed(m)) {
+			m->count = offs;
+			break;
 		}
 	}
 	m->op->stop(m, p);
-	n = min(m->count, size);
-	if (copy_to_iter(m->buf, n, iter) != n)
-		goto Efault;
+	n = copy_to_iter(m->buf, m->count, iter);
 	copied += n;
 	m->count -= n;
 	m->from = n;
 Done:
-	if (!copied)
-		copied = err;
-	else {
+	if (unlikely(!copied)) {
+		copied = m->count ? -EFAULT : err;
+	} else {
 		iocb->ki_pos += copied;
 		m->read_pos += copied;
 	}
@@ -291,9 +291,6 @@ Done:
 Enomem:
 	err = -ENOMEM;
 	goto Done;
-Efault:
-	err = -EFAULT;
-	goto Done;
 }
 EXPORT_SYMBOL(seq_read_iter);
 

From bcee5278958802b40ee8b26679155a6d9231783e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 4 Dec 2020 16:36:16 -0500
Subject: [PATCH 2/6] tracing: Fix userstacktrace option for instances

When the instances were able to use their own options, the userstacktrace
option was left hardcoded for the top level. This made the instance
userstacktrace option bascially into a nop, and will confuse users that set
it, but nothing happens (I was confused when it happened to me!)

Cc: stable@vger.kernel.org
Fixes: 16270145ce6b ("tracing: Add trace options for core options to instances")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7d53c5bdea3e..06134189e9a7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -163,7 +163,8 @@ static union trace_eval_map_item *trace_eval_maps;
 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 
 int tracing_set_tracer(struct trace_array *tr, const char *buf);
-static void ftrace_trace_userstack(struct trace_buffer *buffer,
+static void ftrace_trace_userstack(struct trace_array *tr,
+				   struct trace_buffer *buffer,
 				   unsigned long flags, int pc);
 
 #define MAX_TRACER_SIZE		100
@@ -2870,7 +2871,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
 	 * two. They are not that meaningful.
 	 */
 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
-	ftrace_trace_userstack(buffer, flags, pc);
+	ftrace_trace_userstack(tr, buffer, flags, pc);
 }
 
 /*
@@ -3056,13 +3057,14 @@ EXPORT_SYMBOL_GPL(trace_dump_stack);
 static DEFINE_PER_CPU(int, user_stack_count);
 
 static void
-ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
+ftrace_trace_userstack(struct trace_array *tr,
+		       struct trace_buffer *buffer, unsigned long flags, int pc)
 {
 	struct trace_event_call *call = &event_user_stack;
 	struct ring_buffer_event *event;
 	struct userstack_entry *entry;
 
-	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
+	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
 		return;
 
 	/*
@@ -3101,7 +3103,8 @@ ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
 	preempt_enable();
 }
 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
-static void ftrace_trace_userstack(struct trace_buffer *buffer,
+static void ftrace_trace_userstack(struct trace_array *tr,
+				   struct trace_buffer *buffer,
 				   unsigned long flags, int pc)
 {
 }

From 6220e48d9640538ff700f2e7d24c2f9277555fd6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Dec 2020 16:37:47 -0500
Subject: [PATCH 3/6] [regression fix] really dumb fuckup in sparc64
 __csum_partial_copy() changes

~0U is -1, not 1

Reported-by: Anatoly Pugachev <matorola@gmail.com>
Tested-by: Anatoly Pugachev <matorola@gmail.com>
Fixes: fdf8bee96f9a "sparc64: propagate the calling convention changes down to __csum_partial_copy_...()"
X-brown-paperbag: yes
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/sparc/lib/csum_copy.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S
index 0c0268e77155..d839956407a7 100644
--- a/arch/sparc/lib/csum_copy.S
+++ b/arch/sparc/lib/csum_copy.S
@@ -71,7 +71,7 @@
 FUNC_NAME:		/* %o0=src, %o1=dst, %o2=len */
 	LOAD(prefetch, %o0 + 0x000, #n_reads)
 	xor		%o0, %o1, %g1
-	mov		1, %o3
+	mov		-1, %o3
 	clr		%o4
 	andcc		%g1, 0x3, %g0
 	bne,pn		%icc, 95f

From 1a0e1943d8798cb3241fb5edb9a836af1611b60a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 8 Dec 2020 15:00:36 -0800
Subject: [PATCH 4/6] Revert "scsi: megaraid_sas: Added support for shared host
 tagset for cpuhotplug"

This reverts commit 103fbf8e4020845e4fcf63819288cedb092a3c91.

It turns out that it causes long boot-time latencies (to the point of
timeouts and failed boots).

The cause is the increase in request queues, and a fix for that is
queued up for 5.11, but we're reverting this commit that triggered the
problem for now.

Reported-and-tested-by: John Garry <john.garry@huawei.com>
Reported-and-tested-by: Julia Lawall <julia.lawall@inria.fr>
Reported-by: Qian Cai <cai@redhat.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/linux-scsi/fe3dff7dae4494e5a88caffbb4d877bbf472dceb.camel@redhat.com/
Link: https://lore.kernel.org/lkml/alpine.DEB.2.22.394.2012081813310.2680@hadrien/
Link: https://lore.kernel.org/linux-block/20201203012638.543321-1-ming.lei@redhat.com/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/scsi/megaraid/megaraid_sas_base.c   | 39 ---------------------
 drivers/scsi/megaraid/megaraid_sas_fusion.c | 29 +++++++--------
 2 files changed, 13 insertions(+), 55 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 41cd66fc7d81..e158d3d62056 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -37,7 +37,6 @@
 #include <linux/poll.h>
 #include <linux/vmalloc.h>
 #include <linux/irq_poll.h>
-#include <linux/blk-mq-pci.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -114,10 +113,6 @@ unsigned int enable_sdev_max_qd;
 module_param(enable_sdev_max_qd, int, 0444);
 MODULE_PARM_DESC(enable_sdev_max_qd, "Enable sdev max qd as can_queue. Default: 0");
 
-int host_tagset_enable = 1;
-module_param(host_tagset_enable, int, 0444);
-MODULE_PARM_DESC(host_tagset_enable, "Shared host tagset enable/disable Default: enable(1)");
-
 MODULE_LICENSE("GPL");
 MODULE_VERSION(MEGASAS_VERSION);
 MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com");
@@ -3124,19 +3119,6 @@ megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev,
 	return 0;
 }
 
-static int megasas_map_queues(struct Scsi_Host *shost)
-{
-	struct megasas_instance *instance;
-
-	instance = (struct megasas_instance *)shost->hostdata;
-
-	if (shost->nr_hw_queues == 1)
-		return 0;
-
-	return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
-			instance->pdev, instance->low_latency_index_start);
-}
-
 static void megasas_aen_polling(struct work_struct *work);
 
 /**
@@ -3445,7 +3427,6 @@ static struct scsi_host_template megasas_template = {
 	.eh_timed_out = megasas_reset_timer,
 	.shost_attrs = megaraid_host_attrs,
 	.bios_param = megasas_bios_param,
-	.map_queues = megasas_map_queues,
 	.change_queue_depth = scsi_change_queue_depth,
 	.max_segment_size = 0xffffffff,
 };
@@ -6827,26 +6808,6 @@ static int megasas_io_attach(struct megasas_instance *instance)
 	host->max_lun = MEGASAS_MAX_LUN;
 	host->max_cmd_len = 16;
 
-	/* Use shared host tagset only for fusion adaptors
-	 * if there are managed interrupts (smp affinity enabled case).
-	 * Single msix_vectors in kdump, so shared host tag is also disabled.
-	 */
-
-	host->host_tagset = 0;
-	host->nr_hw_queues = 1;
-
-	if ((instance->adapter_type != MFI_SERIES) &&
-		(instance->msix_vectors > instance->low_latency_index_start) &&
-		host_tagset_enable &&
-		instance->smp_affinity_enable) {
-		host->host_tagset = 1;
-		host->nr_hw_queues = instance->msix_vectors -
-			instance->low_latency_index_start;
-	}
-
-	dev_info(&instance->pdev->dev,
-		"Max firmware commands: %d shared with nr_hw_queues = %d\n",
-		instance->max_fw_cmds, host->nr_hw_queues);
 	/*
 	 * Notify the mid-layer about the new controller
 	 */
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index fd607287608e..b0c01cf0428f 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -359,29 +359,24 @@ megasas_get_msix_index(struct megasas_instance *instance,
 {
 	int sdev_busy;
 
-	/* TBD - if sml remove device_busy in future, driver
-	 * should track counter in internal structure.
-	 */
-	sdev_busy = atomic_read(&scmd->device->device_busy);
+	/* nr_hw_queue = 1 for MegaRAID */
+	struct blk_mq_hw_ctx *hctx =
+		scmd->device->request_queue->queue_hw_ctx[0];
+
+	sdev_busy = atomic_read(&hctx->nr_active);
 
 	if (instance->perf_mode == MR_BALANCED_PERF_MODE &&
-	    sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) {
+	    sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH))
 		cmd->request_desc->SCSIIO.MSIxIndex =
 			mega_mod64((atomic64_add_return(1, &instance->high_iops_outstanding) /
 					MR_HIGH_IOPS_BATCH_COUNT), instance->low_latency_index_start);
-	} else if (instance->msix_load_balance) {
+	else if (instance->msix_load_balance)
 		cmd->request_desc->SCSIIO.MSIxIndex =
 			(mega_mod64(atomic64_add_return(1, &instance->total_io_count),
 				instance->msix_vectors));
-	} else if (instance->host->nr_hw_queues > 1) {
-		u32 tag = blk_mq_unique_tag(scmd->request);
-
-		cmd->request_desc->SCSIIO.MSIxIndex = blk_mq_unique_tag_to_hwq(tag) +
-			instance->low_latency_index_start;
-	} else {
+	else
 		cmd->request_desc->SCSIIO.MSIxIndex =
 			instance->reply_map[raw_smp_processor_id()];
-	}
 }
 
 /**
@@ -961,6 +956,9 @@ megasas_alloc_cmds_fusion(struct megasas_instance *instance)
 	if (megasas_alloc_cmdlist_fusion(instance))
 		goto fail_exit;
 
+	dev_info(&instance->pdev->dev, "Configured max firmware commands: %d\n",
+		 instance->max_fw_cmds);
+
 	/* The first 256 bytes (SMID 0) is not used. Don't add to the cmd list */
 	io_req_base = fusion->io_request_frames + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE;
 	io_req_base_phys = fusion->io_request_frames_phys + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE;
@@ -1104,9 +1102,8 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
 		MR_HIGH_IOPS_QUEUE_COUNT) && cur_intr_coalescing)
 		instance->perf_mode = MR_BALANCED_PERF_MODE;
 
-	dev_info(&instance->pdev->dev, "Performance mode :%s (latency index = %d)\n",
-		MEGASAS_PERF_MODE_2STR(instance->perf_mode),
-		instance->low_latency_index_start);
+	dev_info(&instance->pdev->dev, "Performance mode :%s\n",
+		MEGASAS_PERF_MODE_2STR(instance->perf_mode));
 
 	instance->fw_sync_cache_support = (scratch_pad_1 &
 		MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0;

From 4cb682964706deffb4861f0a91329ab3a705039f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 8 Dec 2020 23:52:03 +0000
Subject: [PATCH 5/6] afs: Fix memory leak when mounting with multiple source
 parameters

There's a memory leak in afs_parse_source() whereby multiple source=
parameters overwrite fc->source in the fs_context struct without freeing
the previously recorded source.

Fix this by only permitting a single source parameter and rejecting with
an error all subsequent ones.

This was caught by syzbot with the kernel memory leak detector, showing
something like the following trace:

  unreferenced object 0xffff888114375440 (size 32):
    comm "repro", pid 5168, jiffies 4294923723 (age 569.948s)
    backtrace:
      slab_post_alloc_hook+0x42/0x79
      __kmalloc_track_caller+0x125/0x16a
      kmemdup_nul+0x24/0x3c
      vfs_parse_fs_string+0x5a/0xa1
      generic_parse_monolithic+0x9d/0xc5
      do_new_mount+0x10d/0x15a
      do_mount+0x5f/0x8e
      __do_sys_mount+0xff/0x127
      do_syscall_64+0x2d/0x3a
      entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 13fcc6837049 ("afs: Add fs_context support")
Reported-by: syzbot+86dc6632faaca40133ab@syzkaller.appspotmail.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/super.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/afs/super.c b/fs/afs/super.c
index 6c5900df6aa5..e38bb1e7a4d2 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -230,6 +230,9 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
 
 	_enter(",%s", name);
 
+	if (fc->source)
+		return invalf(fc, "kAFS: Multiple sources not supported");
+
 	if (!name) {
 		printk(KERN_ERR "kAFS: no volume name specified\n");
 		return -EINVAL;

From a68a0262abdaa251e12c53715f48e698a18ef402 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 8 Dec 2020 20:57:18 -0800
Subject: [PATCH 6/6] mm/madvise: remove racy mm ownership check

Jann spotted the security hole due to race of mm ownership check.

If the task is sharing the mm_struct but goes through execve() before
mm_access(), it could skip process_madvise_behavior_valid check.  That
makes *any advice hint* to reach into the remote process.

This patch removes the mm ownership check.  With it, it will lose the
ability that local process could give *any* advice hint with vector
interface for some reason (e.g., performance).  Since there is no
concrete example in upstream yet, it would be better to remove the
abiliity at this moment and need to review when such new advice comes
up.

Fixes: ecb8ac8b1f14 ("mm/madvise: introduce process_madvise() syscall: an external memory hinting API")
Reported-by: Jann Horn <jannh@google.com>
Suggested-by: Jann Horn <jannh@google.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/madvise.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index a8d8d48a57fe..13f5677b9322 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1204,8 +1204,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
 		goto put_pid;
 	}
 
-	if (task->mm != current->mm &&
-			!process_madvise_behavior_valid(behavior)) {
+	if (!process_madvise_behavior_valid(behavior)) {
 		ret = -EINVAL;
 		goto release_task;
 	}