From 4bbf439b09c5ac3f8b3e9584fe080375d8d0ad2d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 Nov 2020 14:40:37 -0500 Subject: [PATCH 1/6] fix return values of seq_read_iter() Unlike ->read(), ->read_iter() instances *must* return the amount of data they'd left in iterator. For ->read() returning less than it has actually copied is a QoI issue; read(fd, unmapped_page - 5, 8) is allowed to fill all 5 bytes of destination and return 4; it's not nice to caller, but POSIX allows pretty much anything in such situation, up to and including a SIGSEGV. generic_file_splice_read() uses pipe-backed iterator as destination; there a short copy comes from pipe being full, not from running into an un{mapped,writable} page in the middle of destination as we have for iovec-backed iterators read(2) uses. And there we rely upon the ->read_iter() reporting the actual amount it has left in destination. Conversion of a ->read() instance into ->read_iter() has to watch out for that. If you really need an "all or nothing" kind of behaviour somewhere, you need to do iov_iter_revert() to prune the partial copy. In case of seq_read_iter() we can handle short copy just fine; the data is in m->buf and next call will fetch it from there. Fixes: d4d50710a8b4 (seq_file: add seq_read_iter) Tested-by: Nathan Chancellor Signed-off-by: Al Viro --- fs/seq_file.c | 57 ++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 3b20e21604e7..03a369ccd28c 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -168,12 +168,14 @@ EXPORT_SYMBOL(seq_read); ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct seq_file *m = iocb->ki_filp->private_data; - size_t size = iov_iter_count(iter); size_t copied = 0; size_t n; void *p; int err = 0; + if (!iov_iter_count(iter)) + return 0; + mutex_lock(&m->lock); /* @@ -206,36 +208,34 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter) if (!m->buf) goto Enomem; } - /* if not empty - flush it first */ + // something left in the buffer - copy it out first if (m->count) { - n = min(m->count, size); - if (copy_to_iter(m->buf + m->from, n, iter) != n) - goto Efault; + n = copy_to_iter(m->buf + m->from, m->count, iter); m->count -= n; m->from += n; - size -= n; copied += n; - if (!size) + if (m->count) // hadn't managed to copy everything goto Done; } - /* we need at least one record in buffer */ + // get a non-empty record in the buffer m->from = 0; p = m->op->start(m, &m->index); while (1) { err = PTR_ERR(p); - if (!p || IS_ERR(p)) + if (!p || IS_ERR(p)) // EOF or an error break; err = m->op->show(m, p); - if (err < 0) + if (err < 0) // hard error break; - if (unlikely(err)) + if (unlikely(err)) // ->show() says "skip it" m->count = 0; - if (unlikely(!m->count)) { + if (unlikely(!m->count)) { // empty record p = m->op->next(m, p, &m->index); continue; } - if (m->count < m->size) + if (!seq_has_overflowed(m)) // got it goto Fill; + // need a bigger buffer m->op->stop(m, p); kvfree(m->buf); m->count = 0; @@ -244,11 +244,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter) goto Enomem; p = m->op->start(m, &m->index); } + // EOF or an error m->op->stop(m, p); m->count = 0; goto Done; Fill: - /* they want more? let's try to get some more */ + // one non-empty record is in the buffer; if they want more, + // try to fit more in, but in any case we need to advance + // the iterator once for every record shown. while (1) { size_t offs = m->count; loff_t pos = m->index; @@ -259,30 +262,27 @@ Fill: m->op->next); m->index++; } - if (!p || IS_ERR(p)) { - err = PTR_ERR(p); + if (!p || IS_ERR(p)) // no next record for us break; - } - if (m->count >= size) + if (m->count >= iov_iter_count(iter)) break; err = m->op->show(m, p); - if (seq_has_overflowed(m) || err) { + if (err > 0) { // ->show() says "skip it" m->count = offs; - if (likely(err <= 0)) - break; + } else if (err || seq_has_overflowed(m)) { + m->count = offs; + break; } } m->op->stop(m, p); - n = min(m->count, size); - if (copy_to_iter(m->buf, n, iter) != n) - goto Efault; + n = copy_to_iter(m->buf, m->count, iter); copied += n; m->count -= n; m->from = n; Done: - if (!copied) - copied = err; - else { + if (unlikely(!copied)) { + copied = m->count ? -EFAULT : err; + } else { iocb->ki_pos += copied; m->read_pos += copied; } @@ -291,9 +291,6 @@ Done: Enomem: err = -ENOMEM; goto Done; -Efault: - err = -EFAULT; - goto Done; } EXPORT_SYMBOL(seq_read_iter); From bcee5278958802b40ee8b26679155a6d9231783e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 4 Dec 2020 16:36:16 -0500 Subject: [PATCH 2/6] tracing: Fix userstacktrace option for instances When the instances were able to use their own options, the userstacktrace option was left hardcoded for the top level. This made the instance userstacktrace option bascially into a nop, and will confuse users that set it, but nothing happens (I was confused when it happened to me!) Cc: stable@vger.kernel.org Fixes: 16270145ce6b ("tracing: Add trace options for core options to instances") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7d53c5bdea3e..06134189e9a7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -163,7 +163,8 @@ static union trace_eval_map_item *trace_eval_maps; #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ int tracing_set_tracer(struct trace_array *tr, const char *buf); -static void ftrace_trace_userstack(struct trace_buffer *buffer, +static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_buffer *buffer, unsigned long flags, int pc); #define MAX_TRACER_SIZE 100 @@ -2870,7 +2871,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, * two. They are not that meaningful. */ ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); - ftrace_trace_userstack(buffer, flags, pc); + ftrace_trace_userstack(tr, buffer, flags, pc); } /* @@ -3056,13 +3057,14 @@ EXPORT_SYMBOL_GPL(trace_dump_stack); static DEFINE_PER_CPU(int, user_stack_count); static void -ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc) +ftrace_trace_userstack(struct trace_array *tr, + struct trace_buffer *buffer, unsigned long flags, int pc) { struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; struct userstack_entry *entry; - if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE)) + if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE)) return; /* @@ -3101,7 +3103,8 @@ ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc) preempt_enable(); } #else /* CONFIG_USER_STACKTRACE_SUPPORT */ -static void ftrace_trace_userstack(struct trace_buffer *buffer, +static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_buffer *buffer, unsigned long flags, int pc) { } From 6220e48d9640538ff700f2e7d24c2f9277555fd6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Dec 2020 16:37:47 -0500 Subject: [PATCH 3/6] [regression fix] really dumb fuckup in sparc64 __csum_partial_copy() changes ~0U is -1, not 1 Reported-by: Anatoly Pugachev Tested-by: Anatoly Pugachev Fixes: fdf8bee96f9a "sparc64: propagate the calling convention changes down to __csum_partial_copy_...()" X-brown-paperbag: yes Signed-off-by: Al Viro --- arch/sparc/lib/csum_copy.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S index 0c0268e77155..d839956407a7 100644 --- a/arch/sparc/lib/csum_copy.S +++ b/arch/sparc/lib/csum_copy.S @@ -71,7 +71,7 @@ FUNC_NAME: /* %o0=src, %o1=dst, %o2=len */ LOAD(prefetch, %o0 + 0x000, #n_reads) xor %o0, %o1, %g1 - mov 1, %o3 + mov -1, %o3 clr %o4 andcc %g1, 0x3, %g0 bne,pn %icc, 95f From 1a0e1943d8798cb3241fb5edb9a836af1611b60a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 8 Dec 2020 15:00:36 -0800 Subject: [PATCH 4/6] Revert "scsi: megaraid_sas: Added support for shared host tagset for cpuhotplug" This reverts commit 103fbf8e4020845e4fcf63819288cedb092a3c91. It turns out that it causes long boot-time latencies (to the point of timeouts and failed boots). The cause is the increase in request queues, and a fix for that is queued up for 5.11, but we're reverting this commit that triggered the problem for now. Reported-and-tested-by: John Garry Reported-and-tested-by: Julia Lawall Reported-by: Qian Cai Acked-by: Jens Axboe Acked-by: Martin K. Petersen Link: https://lore.kernel.org/linux-scsi/fe3dff7dae4494e5a88caffbb4d877bbf472dceb.camel@redhat.com/ Link: https://lore.kernel.org/lkml/alpine.DEB.2.22.394.2012081813310.2680@hadrien/ Link: https://lore.kernel.org/linux-block/20201203012638.543321-1-ming.lei@redhat.com/ Signed-off-by: Linus Torvalds --- drivers/scsi/megaraid/megaraid_sas_base.c | 39 --------------------- drivers/scsi/megaraid/megaraid_sas_fusion.c | 29 +++++++-------- 2 files changed, 13 insertions(+), 55 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 41cd66fc7d81..e158d3d62056 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -114,10 +113,6 @@ unsigned int enable_sdev_max_qd; module_param(enable_sdev_max_qd, int, 0444); MODULE_PARM_DESC(enable_sdev_max_qd, "Enable sdev max qd as can_queue. Default: 0"); -int host_tagset_enable = 1; -module_param(host_tagset_enable, int, 0444); -MODULE_PARM_DESC(host_tagset_enable, "Shared host tagset enable/disable Default: enable(1)"); - MODULE_LICENSE("GPL"); MODULE_VERSION(MEGASAS_VERSION); MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com"); @@ -3124,19 +3119,6 @@ megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev, return 0; } -static int megasas_map_queues(struct Scsi_Host *shost) -{ - struct megasas_instance *instance; - - instance = (struct megasas_instance *)shost->hostdata; - - if (shost->nr_hw_queues == 1) - return 0; - - return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], - instance->pdev, instance->low_latency_index_start); -} - static void megasas_aen_polling(struct work_struct *work); /** @@ -3445,7 +3427,6 @@ static struct scsi_host_template megasas_template = { .eh_timed_out = megasas_reset_timer, .shost_attrs = megaraid_host_attrs, .bios_param = megasas_bios_param, - .map_queues = megasas_map_queues, .change_queue_depth = scsi_change_queue_depth, .max_segment_size = 0xffffffff, }; @@ -6827,26 +6808,6 @@ static int megasas_io_attach(struct megasas_instance *instance) host->max_lun = MEGASAS_MAX_LUN; host->max_cmd_len = 16; - /* Use shared host tagset only for fusion adaptors - * if there are managed interrupts (smp affinity enabled case). - * Single msix_vectors in kdump, so shared host tag is also disabled. - */ - - host->host_tagset = 0; - host->nr_hw_queues = 1; - - if ((instance->adapter_type != MFI_SERIES) && - (instance->msix_vectors > instance->low_latency_index_start) && - host_tagset_enable && - instance->smp_affinity_enable) { - host->host_tagset = 1; - host->nr_hw_queues = instance->msix_vectors - - instance->low_latency_index_start; - } - - dev_info(&instance->pdev->dev, - "Max firmware commands: %d shared with nr_hw_queues = %d\n", - instance->max_fw_cmds, host->nr_hw_queues); /* * Notify the mid-layer about the new controller */ diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index fd607287608e..b0c01cf0428f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -359,29 +359,24 @@ megasas_get_msix_index(struct megasas_instance *instance, { int sdev_busy; - /* TBD - if sml remove device_busy in future, driver - * should track counter in internal structure. - */ - sdev_busy = atomic_read(&scmd->device->device_busy); + /* nr_hw_queue = 1 for MegaRAID */ + struct blk_mq_hw_ctx *hctx = + scmd->device->request_queue->queue_hw_ctx[0]; + + sdev_busy = atomic_read(&hctx->nr_active); if (instance->perf_mode == MR_BALANCED_PERF_MODE && - sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) { + sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) cmd->request_desc->SCSIIO.MSIxIndex = mega_mod64((atomic64_add_return(1, &instance->high_iops_outstanding) / MR_HIGH_IOPS_BATCH_COUNT), instance->low_latency_index_start); - } else if (instance->msix_load_balance) { + else if (instance->msix_load_balance) cmd->request_desc->SCSIIO.MSIxIndex = (mega_mod64(atomic64_add_return(1, &instance->total_io_count), instance->msix_vectors)); - } else if (instance->host->nr_hw_queues > 1) { - u32 tag = blk_mq_unique_tag(scmd->request); - - cmd->request_desc->SCSIIO.MSIxIndex = blk_mq_unique_tag_to_hwq(tag) + - instance->low_latency_index_start; - } else { + else cmd->request_desc->SCSIIO.MSIxIndex = instance->reply_map[raw_smp_processor_id()]; - } } /** @@ -961,6 +956,9 @@ megasas_alloc_cmds_fusion(struct megasas_instance *instance) if (megasas_alloc_cmdlist_fusion(instance)) goto fail_exit; + dev_info(&instance->pdev->dev, "Configured max firmware commands: %d\n", + instance->max_fw_cmds); + /* The first 256 bytes (SMID 0) is not used. Don't add to the cmd list */ io_req_base = fusion->io_request_frames + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; io_req_base_phys = fusion->io_request_frames_phys + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; @@ -1104,9 +1102,8 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) MR_HIGH_IOPS_QUEUE_COUNT) && cur_intr_coalescing) instance->perf_mode = MR_BALANCED_PERF_MODE; - dev_info(&instance->pdev->dev, "Performance mode :%s (latency index = %d)\n", - MEGASAS_PERF_MODE_2STR(instance->perf_mode), - instance->low_latency_index_start); + dev_info(&instance->pdev->dev, "Performance mode :%s\n", + MEGASAS_PERF_MODE_2STR(instance->perf_mode)); instance->fw_sync_cache_support = (scratch_pad_1 & MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0; From 4cb682964706deffb4861f0a91329ab3a705039f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 8 Dec 2020 23:52:03 +0000 Subject: [PATCH 5/6] afs: Fix memory leak when mounting with multiple source parameters There's a memory leak in afs_parse_source() whereby multiple source= parameters overwrite fc->source in the fs_context struct without freeing the previously recorded source. Fix this by only permitting a single source parameter and rejecting with an error all subsequent ones. This was caught by syzbot with the kernel memory leak detector, showing something like the following trace: unreferenced object 0xffff888114375440 (size 32): comm "repro", pid 5168, jiffies 4294923723 (age 569.948s) backtrace: slab_post_alloc_hook+0x42/0x79 __kmalloc_track_caller+0x125/0x16a kmemdup_nul+0x24/0x3c vfs_parse_fs_string+0x5a/0xa1 generic_parse_monolithic+0x9d/0xc5 do_new_mount+0x10d/0x15a do_mount+0x5f/0x8e __do_sys_mount+0xff/0x127 do_syscall_64+0x2d/0x3a entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 13fcc6837049 ("afs: Add fs_context support") Reported-by: syzbot+86dc6632faaca40133ab@syzkaller.appspotmail.com Signed-off-by: David Howells cc: Randy Dunlap Signed-off-by: Linus Torvalds --- fs/afs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/afs/super.c b/fs/afs/super.c index 6c5900df6aa5..e38bb1e7a4d2 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -230,6 +230,9 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) _enter(",%s", name); + if (fc->source) + return invalf(fc, "kAFS: Multiple sources not supported"); + if (!name) { printk(KERN_ERR "kAFS: no volume name specified\n"); return -EINVAL; From a68a0262abdaa251e12c53715f48e698a18ef402 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 8 Dec 2020 20:57:18 -0800 Subject: [PATCH 6/6] mm/madvise: remove racy mm ownership check Jann spotted the security hole due to race of mm ownership check. If the task is sharing the mm_struct but goes through execve() before mm_access(), it could skip process_madvise_behavior_valid check. That makes *any advice hint* to reach into the remote process. This patch removes the mm ownership check. With it, it will lose the ability that local process could give *any* advice hint with vector interface for some reason (e.g., performance). Since there is no concrete example in upstream yet, it would be better to remove the abiliity at this moment and need to review when such new advice comes up. Fixes: ecb8ac8b1f14 ("mm/madvise: introduce process_madvise() syscall: an external memory hinting API") Reported-by: Jann Horn Suggested-by: Jann Horn Signed-off-by: Minchan Kim Signed-off-by: Linus Torvalds --- mm/madvise.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index a8d8d48a57fe..13f5677b9322 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1204,8 +1204,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, goto put_pid; } - if (task->mm != current->mm && - !process_madvise_behavior_valid(behavior)) { + if (!process_madvise_behavior_valid(behavior)) { ret = -EINVAL; goto release_task; }