From aa23317d0268b309bb3f0801ddd0d61813ff5afb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Feb 2024 23:58:42 -0500 Subject: [PATCH 01/30] qibfs: fix dentry leak simple_recursive_removal() drops the pinning references to all positives in subtree. For the cases when its argument has been kept alive by the pinning alone that's exactly the right thing to do, but here the argument comes from dcache lookup, that needs to be balanced by explicit dput(). Fixes: e41d237818598 "qib_fs: switch to simple_recursive_removal()" Fucked-up-by: Al Viro Signed-off-by: Al Viro --- drivers/infiniband/hw/qib/qib_fs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 455e966eeff3..b27791029fa9 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -439,6 +439,7 @@ static int remove_device_files(struct super_block *sb, return PTR_ERR(dir); } simple_recursive_removal(dir, NULL); + dput(dir); return 0; } From d7ed5232f0f16181506373d73e711190d5e0c868 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Sun, 31 Mar 2024 23:38:17 +0900 Subject: [PATCH 02/30] exfat: fix timing of synchronizing bitmap and inode Commit(f55c096f62f1 exfat: do not zero the extended part) changed the timing of synchronizing bitmap and inode in exfat_cont_expand(). The change caused xfstests generic/013 to fail if 'dirsync' or 'sync' is enabled. So this commit restores the timing. Fixes: f55c096f62f1 ("exfat: do not zero the extended part") Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/file.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/exfat/file.c b/fs/exfat/file.c index cc00f1a7a1e1..9adfc38ca7da 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -51,7 +51,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size) clu.flags = ei->flags; ret = exfat_alloc_cluster(inode, new_num_clusters - num_clusters, - &clu, IS_DIRSYNC(inode)); + &clu, inode_needs_sync(inode)); if (ret) return ret; @@ -77,12 +77,11 @@ out: ei->i_size_aligned = round_up(size, sb->s_blocksize); ei->i_size_ondisk = ei->i_size_aligned; inode->i_blocks = round_up(size, sbi->cluster_size) >> 9; - - if (IS_DIRSYNC(inode)) - return write_inode_now(inode, 1); - mark_inode_dirty(inode); + if (IS_SYNC(inode)) + return write_inode_now(inode, 1); + return 0; free_clu: From aef8acd79f363ced098cd3bcde0a5978a52607ad Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 22 Apr 2024 09:40:08 +0300 Subject: [PATCH 03/30] fuse: verify zero padding in fuse_backing_map To allow us extending the interface in the future. Fixes: 44350256ab94 ("fuse: implement ioctls to manage backing files") Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/fuse/passthrough.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c index 1567f0323858..9666d13884ce 100644 --- a/fs/fuse/passthrough.c +++ b/fs/fuse/passthrough.c @@ -225,7 +225,7 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map) goto out; res = -EINVAL; - if (map->flags) + if (map->flags || map->padding) goto out; file = fget(map->fd); From f19257997d9c31864b4cf3fcef6e2d2b9ede148d Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Fri, 12 Jan 2024 14:48:46 +0800 Subject: [PATCH 04/30] exfat: zero the reserved fields of file and stream extension dentries From exFAT specification, the reserved fields should initialize to zero and should not use for any purpose. If create a new dentry set in the UNUSED dentries, all fields had been zeroed when allocating cluster to parent directory. But if create a new dentry set in the DELETED dentries, the reserved fields in file and stream extension dentries may be non-zero. Because only the valid bit of the type field of the dentry is cleared in exfat_remove_entries(), if the type of dentry is different from the original(For example, a dentry that was originally a file name dentry, then set to deleted dentry, and then set as a file dentry), the reserved fields is non-zero. So this commit initializes the dentry to 0 before createing file dentry and stream extension dentry. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/dir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 077944d3c2c0..84572e11cc05 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -420,6 +420,7 @@ static void exfat_set_entry_type(struct exfat_dentry *ep, unsigned int type) static void exfat_init_stream_entry(struct exfat_dentry *ep, unsigned int start_clu, unsigned long long size) { + memset(ep, 0, sizeof(*ep)); exfat_set_entry_type(ep, TYPE_STREAM); if (size == 0) ep->dentry.stream.flags = ALLOC_FAT_CHAIN; @@ -457,6 +458,7 @@ void exfat_init_dir_entry(struct exfat_entry_set_cache *es, struct exfat_dentry *ep; ep = exfat_get_dentry_cached(es, ES_IDX_FILE); + memset(ep, 0, sizeof(*ep)); exfat_set_entry_type(ep, type); exfat_set_entry_time(sbi, ts, &ep->dentry.file.create_tz, From bc642d7bfdac3bfd838a1cd6651955ae2eb8535a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 24 Apr 2024 16:57:01 +0900 Subject: [PATCH 05/30] ksmbd: fix uninitialized symbol 'share' in smb2_tree_connect() Fix uninitialized symbol 'share' in smb2_tree_connect(). Fixes: e9d8c2f95ab8 ("ksmbd: add continuous availability share parameter") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 355824151c2d..30229161b346 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1926,7 +1926,7 @@ int smb2_tree_connect(struct ksmbd_work *work) struct ksmbd_session *sess = work->sess; char *treename = NULL, *name = NULL; struct ksmbd_tree_conn_status status; - struct ksmbd_share_config *share; + struct ksmbd_share_config *share = NULL; int rc = -EINVAL; WORK_BUFFERS(work, req, rsp); @@ -1988,7 +1988,7 @@ int smb2_tree_connect(struct ksmbd_work *work) write_unlock(&sess->tree_conns_lock); rsp->StructureSize = cpu_to_le16(16); out_err1: - if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE && + if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE && share && test_share_config_flag(share, KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY)) rsp->Capabilities = SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY; From cc00bc83f26eb8f2d8d9f56b949b62fd774d8432 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 May 2024 21:41:50 +0900 Subject: [PATCH 06/30] ksmbd: off ipv6only for both ipv4/ipv6 binding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ΕΛΕΝΗ reported that ksmbd binds to the IPV6 wildcard (::) by default for ipv4 and ipv6 binding. So IPV4 connections are successful only when the Linux system parameter bindv6only is set to 0 [default value]. If this parameter is set to 1, then the ipv6 wildcard only represents any IPV6 address. Samba creates different sockets for ipv4 and ipv6 by default. This patch off sk_ipv6only to support IPV4/IPV6 connections without creating two sockets. Cc: stable@vger.kernel.org Reported-by: ΕΛΕΝΗ ΤΖΑΒΕΛΛΑ Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_tcp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 002a3f0dc7c5..6633fa78e9b9 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -448,6 +448,10 @@ static int create_socket(struct interface *iface) sin6.sin6_family = PF_INET6; sin6.sin6_addr = in6addr_any; sin6.sin6_port = htons(server_conf.tcp_port); + + lock_sock(ksmbd_socket->sk); + ksmbd_socket->sk->sk_ipv6only = false; + release_sock(ksmbd_socket->sk); } ksmbd_tcp_nodelay(ksmbd_socket); From 97c2ec64667bacc49881d2b2dd9afd4d1c3fbaeb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 May 2024 21:44:02 +0900 Subject: [PATCH 07/30] ksmbd: avoid to send duplicate lease break notifications This patch fixes generic/011 when enable smb2 leases. if ksmbd sends multiple notifications for a file, cifs increments the reference count of the file but it does not decrement the count by the failure of queue_work. So even if the file is closed, cifs does not send a SMB2_CLOSE request. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 4978edfb15f9..6fd8cb7064dc 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -613,13 +613,23 @@ static int oplock_break_pending(struct oplock_info *opinfo, int req_op_level) if (opinfo->op_state == OPLOCK_CLOSING) return -ENOENT; - else if (!opinfo->is_lease && opinfo->level <= req_op_level) - return 1; + else if (opinfo->level <= req_op_level) { + if (opinfo->is_lease && + opinfo->o_lease->state != + (SMB2_LEASE_HANDLE_CACHING_LE | + SMB2_LEASE_READ_CACHING_LE)) + return 1; + } } - if (!opinfo->is_lease && opinfo->level <= req_op_level) { - wake_up_oplock_break(opinfo); - return 1; + if (opinfo->level <= req_op_level) { + if (opinfo->is_lease && + opinfo->o_lease->state != + (SMB2_LEASE_HANDLE_CACHING_LE | + SMB2_LEASE_READ_CACHING_LE)) { + wake_up_oplock_break(opinfo); + return 1; + } } return 0; } @@ -887,7 +897,6 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level) struct lease *lease = brk_opinfo->o_lease; atomic_inc(&brk_opinfo->breaking_cnt); - err = oplock_break_pending(brk_opinfo, req_op_level); if (err) return err < 0 ? err : 0; From d1c189c6cb8b0fb7b5ee549237d27889c40c2f8b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 2 May 2024 10:07:50 +0900 Subject: [PATCH 08/30] ksmbd: use rwsem instead of rwlock for lease break lease break wait for lease break acknowledgment. rwsem is more suitable than unlock while traversing the list for parent lease break in ->m_op_list. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 30 +++++++++++------------------- fs/smb/server/smb2pdu.c | 4 ++-- fs/smb/server/smb_common.c | 4 ++-- fs/smb/server/vfs_cache.c | 28 ++++++++++++++-------------- fs/smb/server/vfs_cache.h | 2 +- 5 files changed, 30 insertions(+), 38 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 6fd8cb7064dc..c2abf109010d 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -207,9 +207,9 @@ static void opinfo_add(struct oplock_info *opinfo) { struct ksmbd_inode *ci = opinfo->o_fp->f_ci; - write_lock(&ci->m_lock); + down_write(&ci->m_lock); list_add_rcu(&opinfo->op_entry, &ci->m_op_list); - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); } static void opinfo_del(struct oplock_info *opinfo) @@ -221,9 +221,9 @@ static void opinfo_del(struct oplock_info *opinfo) lease_del_list(opinfo); write_unlock(&lease_list_lock); } - write_lock(&ci->m_lock); + down_write(&ci->m_lock); list_del_rcu(&opinfo->op_entry); - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); } static unsigned long opinfo_count(struct ksmbd_file *fp) @@ -526,21 +526,18 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, * Compare lease key and client_guid to know request from same owner * of same client */ - read_lock(&ci->m_lock); + down_read(&ci->m_lock); list_for_each_entry(opinfo, &ci->m_op_list, op_entry) { if (!opinfo->is_lease || !opinfo->conn) continue; - read_unlock(&ci->m_lock); lease = opinfo->o_lease; ret = compare_guid_key(opinfo, client_guid, lctx->lease_key); if (ret) { m_opinfo = opinfo; /* skip upgrading lease about breaking lease */ - if (atomic_read(&opinfo->breaking_cnt)) { - read_lock(&ci->m_lock); + if (atomic_read(&opinfo->breaking_cnt)) continue; - } /* upgrading lease */ if ((atomic_read(&ci->op_count) + @@ -570,9 +567,8 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, lease_none_upgrade(opinfo, lctx->req_state); } } - read_lock(&ci->m_lock); } - read_unlock(&ci->m_lock); + up_read(&ci->m_lock); return m_opinfo; } @@ -1114,7 +1110,7 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, if (!p_ci) return; - read_lock(&p_ci->m_lock); + down_read(&p_ci->m_lock); list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { if (opinfo->conn == NULL || !opinfo->is_lease) continue; @@ -1132,13 +1128,11 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, continue; } - read_unlock(&p_ci->m_lock); oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); opinfo_conn_put(opinfo); - read_lock(&p_ci->m_lock); } } - read_unlock(&p_ci->m_lock); + up_read(&p_ci->m_lock); ksmbd_inode_put(p_ci); } @@ -1159,7 +1153,7 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) if (!p_ci) return; - read_lock(&p_ci->m_lock); + down_read(&p_ci->m_lock); list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { if (opinfo->conn == NULL || !opinfo->is_lease) continue; @@ -1173,13 +1167,11 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) atomic_dec(&opinfo->conn->r_count); continue; } - read_unlock(&p_ci->m_lock); oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); opinfo_conn_put(opinfo); - read_lock(&p_ci->m_lock); } } - read_unlock(&p_ci->m_lock); + up_read(&p_ci->m_lock); ksmbd_inode_put(p_ci); } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 30229161b346..b6c5a8ea3887 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3376,9 +3376,9 @@ int smb2_open(struct ksmbd_work *work) * after daccess, saccess, attrib_only, and stream are * initialized. */ - write_lock(&fp->f_ci->m_lock); + down_write(&fp->f_ci->m_lock); list_add(&fp->node, &fp->f_ci->m_fp_list); - write_unlock(&fp->f_ci->m_lock); + up_write(&fp->f_ci->m_lock); /* Check delete pending among previous fp before oplock break */ if (ksmbd_inode_pending_delete(fp)) { diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index fcaf373cc008..474dadf6b7b8 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -646,7 +646,7 @@ int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp) * Lookup fp in master fp list, and check desired access and * shared mode between previous open and current open. */ - read_lock(&curr_fp->f_ci->m_lock); + down_read(&curr_fp->f_ci->m_lock); list_for_each_entry(prev_fp, &curr_fp->f_ci->m_fp_list, node) { if (file_inode(filp) != file_inode(prev_fp->filp)) continue; @@ -722,7 +722,7 @@ int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp) break; } } - read_unlock(&curr_fp->f_ci->m_lock); + up_read(&curr_fp->f_ci->m_lock); return rc; } diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 030f70700036..6cb599cd287e 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -165,7 +165,7 @@ static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp) ci->m_fattr = 0; INIT_LIST_HEAD(&ci->m_fp_list); INIT_LIST_HEAD(&ci->m_op_list); - rwlock_init(&ci->m_lock); + init_rwsem(&ci->m_lock); ci->m_de = fp->filp->f_path.dentry; return 0; } @@ -261,14 +261,14 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp) } if (atomic_dec_and_test(&ci->m_count)) { - write_lock(&ci->m_lock); + down_write(&ci->m_lock); if (ci->m_flags & (S_DEL_ON_CLS | S_DEL_PENDING)) { ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING); - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); ksmbd_vfs_unlink(filp); - write_lock(&ci->m_lock); + down_write(&ci->m_lock); } - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); ksmbd_inode_free(ci); } @@ -289,9 +289,9 @@ static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp if (!has_file_id(fp->volatile_id)) return; - write_lock(&fp->f_ci->m_lock); + down_write(&fp->f_ci->m_lock); list_del_init(&fp->node); - write_unlock(&fp->f_ci->m_lock); + up_write(&fp->f_ci->m_lock); write_lock(&ft->lock); idr_remove(ft->idr, fp->volatile_id); @@ -523,17 +523,17 @@ struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry) if (!ci) return NULL; - read_lock(&ci->m_lock); + down_read(&ci->m_lock); list_for_each_entry(lfp, &ci->m_fp_list, node) { if (inode == file_inode(lfp->filp)) { atomic_dec(&ci->m_count); lfp = ksmbd_fp_get(lfp); - read_unlock(&ci->m_lock); + up_read(&ci->m_lock); return lfp; } } atomic_dec(&ci->m_count); - read_unlock(&ci->m_lock); + up_read(&ci->m_lock); return NULL; } @@ -705,13 +705,13 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon, conn = fp->conn; ci = fp->f_ci; - write_lock(&ci->m_lock); + down_write(&ci->m_lock); list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) { if (op->conn != conn) continue; op->conn = NULL; } - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); fp->conn = NULL; fp->tcon = NULL; @@ -801,13 +801,13 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp) fp->tcon = work->tcon; ci = fp->f_ci; - write_lock(&ci->m_lock); + down_write(&ci->m_lock); list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) { if (op->conn) continue; op->conn = fp->conn; } - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID); if (!has_file_id(fp->volatile_id)) { diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index ed44fb4e18e7..5a225e7055f1 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -47,7 +47,7 @@ struct stream { }; struct ksmbd_inode { - rwlock_t m_lock; + struct rw_semaphore m_lock; atomic_t m_count; atomic_t op_count; /* opinfo count for streams */ From 691aae4f36f9825df6781da4399a1e718951085a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 May 2024 21:58:15 +0900 Subject: [PATCH 09/30] ksmbd: do not grant v2 lease if parent lease key and epoch are not set This patch fix xfstests generic/070 test with smb2 leases = yes. cifs.ko doesn't set parent lease key and epoch in create context v2 lease. ksmbd suppose that parent lease and epoch are vaild if data length is v2 lease context size and handle directory lease using this values. ksmbd should hanle it as v1 lease not v2 lease if parent lease key and epoch are not set in create context v2 lease. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index c2abf109010d..b9d9116fc2b3 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1201,7 +1201,9 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, /* Only v2 leases handle the directory */ if (S_ISDIR(file_inode(fp->filp)->i_mode)) { - if (!lctx || lctx->version != 2) + if (!lctx || lctx->version != 2 || + (lctx->flags != SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE && + !lctx->epoch)) return 0; } @@ -1466,8 +1468,9 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) buf->lcontext.LeaseFlags = lease->flags; buf->lcontext.Epoch = cpu_to_le16(lease->epoch); buf->lcontext.LeaseState = lease->state; - memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, - SMB2_LEASE_KEY_SIZE); + if (lease->flags == SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE) + memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, + SMB2_LEASE_KEY_SIZE); buf->ccontext.DataOffset = cpu_to_le16(offsetof (struct create_lease_v2, lcontext)); buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2)); @@ -1526,8 +1529,9 @@ struct lease_ctx_info *parse_lease_state(void *open_req) lreq->flags = lc->lcontext.LeaseFlags; lreq->epoch = lc->lcontext.Epoch; lreq->duration = lc->lcontext.LeaseDuration; - memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey, - SMB2_LEASE_KEY_SIZE); + if (lreq->flags == SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE) + memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey, + SMB2_LEASE_KEY_SIZE); lreq->version = 2; } else { struct create_lease *lc = (struct create_lease *)cc; From 72e71bf0298c7ed985bcd0d3c7ff4ca19de60373 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 May 2024 10:14:13 -0400 Subject: [PATCH 10/30] bcachefs: Fix a scheduler splat in __bch2_next_write_buffer_flush_journal_buf() We're using mutex_lock() inside a wait_event() conditional - prepare_to_wait() has already flipped task state, so potentially blocking ops need annotation. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 9c9a25dbd613..9c2af544251a 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -870,6 +870,8 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou { struct journal_buf *ret = NULL; + /* We're inside wait_event(), but using mutex_lock(: */ + sched_annotate_sleep(); mutex_lock(&j->buf_lock); spin_lock(&j->lock); max_seq = min(max_seq, journal_cur_seq(j)); From 7ffec9ccdc6ad8356792f9a7823b1fe9c8a10cbf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 May 2024 10:55:17 -0400 Subject: [PATCH 11/30] bcachefs: don't free error pointers Reported-by: syzbot+3333603f569fc2ef258c@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index be5b47619327..8091d0686029 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -902,7 +902,8 @@ out: bch2_journal_keys_put_initial(c); bch2_find_btree_nodes_exit(&c->found_btree_nodes); } - kfree(clean); + if (!IS_ERR(clean)) + kfree(clean); if (!ret && test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) && From a2ddaf965f6a15c316f483e7446fbe3d81fba27c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 May 2024 11:06:54 -0400 Subject: [PATCH 12/30] bcachefs: bucket_pos_to_bp_noerror() We don't want the assert when we're checking if the backpointer is valid. Reported-by: syzbot+bf7215c0525098e7747a@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 2 +- fs/bcachefs/backpointers.h | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index a20044201002..af7a71de1bdf 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -54,7 +54,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, int ret = 0; bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || - !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), + !bpos_eq(bp.k->p, bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset)), c, err, backpointer_bucket_offset_wrong, "backpointer bucket_offset wrong"); diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 85949b9fd880..c1b274eadda1 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -45,6 +45,15 @@ static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c, return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector)); } +static inline struct bpos bucket_pos_to_bp_noerror(const struct bch_dev *ca, + struct bpos bucket, + u64 bucket_offset) +{ + return POS(bucket.inode, + (bucket_to_sector(ca, bucket.offset) << + MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); +} + /* * Convert from pos in alloc btree + bucket offset to pos in backpointer btree: */ @@ -53,10 +62,7 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c, u64 bucket_offset) { struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode); - struct bpos ret = POS(bucket.inode, - (bucket_to_sector(ca, bucket.offset) << - MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); - + struct bpos ret = bucket_pos_to_bp_noerror(ca, bucket, bucket_offset); EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret))); return ret; } From b30b70ad8bffcde513d34d525820ec411f48e3d7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 May 2024 11:39:53 -0400 Subject: [PATCH 13/30] bcachefs: Fix early error path in bch2_fs_btree_key_cache_exit() Reported-by: syzbot+a35cdb62ec34d44fb062@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index e8c1c530cd95..7dafa1accec2 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -956,13 +956,15 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) } #ifdef __KERNEL__ - for_each_possible_cpu(cpu) { - struct btree_key_cache_freelist *f = - per_cpu_ptr(bc->pcpu_freed, cpu); + if (bc->pcpu_freed) { + for_each_possible_cpu(cpu) { + struct btree_key_cache_freelist *f = + per_cpu_ptr(bc->pcpu_freed, cpu); - for (i = 0; i < f->nr; i++) { - ck = f->objs[i]; - list_add(&ck->list, &items); + for (i = 0; i < f->nr; i++) { + ck = f->objs[i]; + list_add(&ck->list, &items); + } } } #endif From 4a8521b6bb81abba9d80d60b80908c77c9236ced Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 May 2024 11:31:22 -0400 Subject: [PATCH 14/30] bcachefs: Inodes need extra padding for varint_decode_fast() Reported-by: syzbot+66b9b74f6520068596a9@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index f137252bccc5..b72cf31f7274 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -199,9 +199,6 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, u64 new_i_size, s64 i_sectors_delta) { - struct btree_iter iter; - struct bkey_i *k; - struct bkey_i_inode_v3 *inode; /* * Crazy performance optimization: * Every extent update needs to also update the inode: the inode trigger @@ -214,25 +211,36 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, * lost, but that's fine. */ unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL; - int ret; - k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes, + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, SPOS(0, extent_iter->pos.inode, extent_iter->snapshot), BTREE_ITER_CACHED); - ret = PTR_ERR_OR_ZERO(k); + int ret = bkey_err(k); if (unlikely(ret)) return ret; - if (unlikely(k->k.type != KEY_TYPE_inode_v3)) { - k = bch2_inode_to_v3(trans, k); - ret = PTR_ERR_OR_ZERO(k); + /* + * varint_decode_fast(), in the inode .invalid method, reads up to 7 + * bytes past the end of the buffer: + */ + struct bkey_i *k_mut = bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + 8); + ret = PTR_ERR_OR_ZERO(k_mut); + if (unlikely(ret)) + goto err; + + bkey_reassemble(k_mut, k); + + if (unlikely(k_mut->k.type != KEY_TYPE_inode_v3)) { + k_mut = bch2_inode_to_v3(trans, k_mut); + ret = PTR_ERR_OR_ZERO(k_mut); if (unlikely(ret)) goto err; } - inode = bkey_i_to_inode_v3(k); + struct bkey_i_inode_v3 *inode = bkey_i_to_inode_v3(k_mut); if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) && new_i_size > le64_to_cpu(inode->v.bi_size)) { From feb077c1774e559cddfc41ef26c864780d158fe2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 May 2024 17:13:21 -0400 Subject: [PATCH 15/30] bcachefs: Fix refcount put in sb_field_resize error path Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 08ea3dbbbe97..e7527d551e3c 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -232,7 +232,7 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, struct bch_sb_handle *dev_sb = &ca->disk_sb; if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { - percpu_ref_put(&ca->ref); + percpu_ref_put(&ca->io_ref); return NULL; } } From 1267df40acb2da62b1641abae26132411d093fb3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 May 2024 12:29:46 -0400 Subject: [PATCH 16/30] bcachefs: Initialize bch_write_op->failed in inline data path Normally this is initialized in __bch2_write(), which is executed in a loop, but the inline data path skips this. Reported-by: syzbot+fd3ccb331eb21f05d13b@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index b72cf31f7274..40d7df7607df 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1513,6 +1513,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) unsigned sectors; int ret; + memset(&op->failed, 0, sizeof(op->failed)); + op->flags |= BCH_WRITE_WROTE_DATA_INLINE; op->flags |= BCH_WRITE_DONE; From 3a2d0259274202432e5119463dd4cf5f9fabed98 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 May 2024 12:51:49 -0400 Subject: [PATCH 17/30] bcachefs: Fix bch2_dev_lookup() refcounting bch2_dev_lookup() is supposed to take a ref on the device it returns, but for_each_member_device() takes refs as it iterates, for_each_member_device_rcu() does not. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 88e214c609bb..c2c80e6890ae 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -2004,13 +2004,9 @@ err: /* return with ref on ca->ref: */ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) { - rcu_read_lock(); - for_each_member_device_rcu(c, ca, NULL) - if (!strcmp(name, ca->name)) { - rcu_read_unlock(); + for_each_member_device(c, ca) + if (!strcmp(name, ca->name)) return ca; - } - rcu_read_unlock(); return ERR_PTR(-BCH_ERR_ENOENT_dev_not_found); } From 18b4abcead744322feb90ba31450e7d770e928bd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 May 2024 12:55:44 -0400 Subject: [PATCH 18/30] bcachefs: Fix lifetime issue in device iterator helpers bch2_get_next_dev() and bch2_get_next_online_dev() iterate over devices, dropping and taking refs as they go; we can't access the previous device (for ca->dev_idx) after we've dropped our ref to it, unless we take rcu_read_lock() first. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 5efa64eca5f8..5bf27d30ca29 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -107,10 +107,10 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev * static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev *ca) { + rcu_read_lock(); if (ca) percpu_ref_put(&ca->ref); - rcu_read_lock(); if ((ca = __bch2_next_dev(c, ca, NULL))) percpu_ref_get(&ca->ref); rcu_read_unlock(); @@ -132,10 +132,10 @@ static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c, struct bch_dev *ca, unsigned state_mask) { + rcu_read_lock(); if (ca) percpu_ref_put(&ca->io_ref); - rcu_read_lock(); while ((ca = __bch2_next_dev(c, ca, NULL)) && (!((1 << ca->mi.state) & state_mask) || !percpu_ref_tryget(&ca->io_ref))) From db42549d402cb44fe67c95d08f1a9ea902d32e7e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 May 2024 13:26:37 -0400 Subject: [PATCH 19/30] bcachefs: Add a better limit for maximum number of buckets The bucket_gens array is a single array allocation (one byte per bucket), and kernel allocations are still limited to INT_MAX. Check this limit to avoid failing the bucket_gens array allocation. Reported-by: syzbot+b29f436493184ea42e2b@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 6 ++++++ fs/bcachefs/errcode.h | 1 + fs/bcachefs/sb-members.c | 6 +++--- fs/bcachefs/super.c | 7 +++++++ 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index f7fbfccd2b1e..8345a2b2d05b 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -591,6 +591,12 @@ struct bch_member { __le64 btree_allocated_bitmap; }; +/* + * This limit comes from the bucket_gens array - it's a single allocation, and + * kernel allocation are limited to INT_MAX + */ +#define BCH_MEMBER_NBUCKETS_MAX (INT_MAX - 64) + #define BCH_MEMBER_V1_BYTES 56 LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 01a79fa3eacb..dbe35b80bc0b 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -175,6 +175,7 @@ x(EINVAL, block_size_too_small) \ x(EINVAL, bucket_size_too_small) \ x(EINVAL, device_size_too_small) \ + x(EINVAL, device_size_too_big) \ x(EINVAL, device_not_a_member_of_filesystem) \ x(EINVAL, device_has_been_removed) \ x(EINVAL, device_splitbrain) \ diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 5b8e621ac5eb..44b3f0cb7b49 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -124,9 +124,9 @@ static int validate_member(struct printbuf *err, struct bch_sb *sb, int i) { - if (le64_to_cpu(m.nbuckets) > LONG_MAX) { - prt_printf(err, "device %u: too many buckets (got %llu, max %lu)", - i, le64_to_cpu(m.nbuckets), LONG_MAX); + if (le64_to_cpu(m.nbuckets) > BCH_MEMBER_NBUCKETS_MAX) { + prt_printf(err, "device %u: too many buckets (got %llu, max %u)", + i, le64_to_cpu(m.nbuckets), BCH_MEMBER_NBUCKETS_MAX); return -BCH_ERR_invalid_sb_members; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index c2c80e6890ae..dddf57ec4511 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1959,6 +1959,13 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) goto err; } + if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) { + bch_err(ca, "New device size too big (%llu greater than max %u)", + nbuckets, BCH_MEMBER_NBUCKETS_MAX); + ret = -BCH_ERR_device_size_too_big; + goto err; + } + if (bch2_dev_is_online(ca) && get_capacity(ca->disk_sb.bdev->bd_disk) < ca->mi.bucket_size * nbuckets) { From 9a0ec045110dbaad4b8d609142b534f913354101 Mon Sep 17 00:00:00 2001 From: Reed Riley Date: Sat, 4 May 2024 22:12:23 +0000 Subject: [PATCH 20/30] bcachefs: fix overflow in fiemap filefrag (and potentially other utilities that call fiemap) sometimes pass ULONG_MAX as the length. fiemap_prep clamps excessively large lengths - but the calculation of end can overflow if it occurs before calling fiemap_prep. When this happens, filefrag assumes it has read to the end and exits. Signed-off-by: Reed Riley Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index fce690007edf..6f114803c6f2 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -964,7 +964,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct btree_iter iter; struct bkey_s_c k; struct bkey_buf cur, prev; - struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); unsigned offset_into_extent, sectors; bool have_extent = false; u32 snapshot; @@ -974,6 +973,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (ret) return ret; + struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); if (start + len < start) return -EINVAL; From 6b8cbfc3db7582d6f26c6b757d8e949174641709 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 May 2024 22:02:28 -0400 Subject: [PATCH 21/30] bcachefs: Fix assert in bch2_alloc_v4_invalid() Reported-by: syzbot+10827fa6b176e1acf1d0@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 4 ++-- fs/bcachefs/alloc_background.h | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 4ff56fa4d539..534ba2b02bd6 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -244,10 +244,10 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s(a.v) > bkey_val_u64s(k.k), c, err, + bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err, alloc_v4_val_size_bad, "bad val size (%u > %zu)", - alloc_v4_u64s(a.v), bkey_val_u64s(k.k)); + alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 052b2fac25d6..2790e516383d 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -126,13 +126,17 @@ static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_ return pos; } -static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a) +static inline unsigned alloc_v4_u64s_noerror(const struct bch_alloc_v4 *a) { - unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?: + return (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?: BCH_ALLOC_V4_U64s_V0) + BCH_ALLOC_V4_NR_BACKPOINTERS(a) * (sizeof(struct bch_backpointer) / sizeof(u64)); +} +static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a) +{ + unsigned ret = alloc_v4_u64s_noerror(a); BUG_ON(ret > U8_MAX - BKEY_U64s); return ret; } From f39055220f6f98a180e3503fe05bbf9921c425c8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 May 2024 22:28:00 -0400 Subject: [PATCH 22/30] bcachefs: Add missing validation for superblock section clean We were forgetting to check for jset entries that overrun the end of the section - both in validate and to_text(); to_text() needs to be safe for types that fail to validate. Reported-by: syzbot+c48865e11e7e893ec4ab@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-clean.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 35ca3f138de6..194e55b11137 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -278,6 +278,17 @@ static int bch2_sb_clean_validate(struct bch_sb *sb, return -BCH_ERR_invalid_sb_clean; } + for (struct jset_entry *entry = clean->start; + entry != vstruct_end(&clean->field); + entry = vstruct_next(entry)) { + if ((void *) vstruct_next(entry) > vstruct_end(&clean->field)) { + prt_str(err, "entry type "); + bch2_prt_jset_entry_type(err, le16_to_cpu(entry->type)); + prt_str(err, " overruns end of section"); + return -BCH_ERR_invalid_sb_clean; + } + } + return 0; } @@ -295,6 +306,9 @@ static void bch2_sb_clean_to_text(struct printbuf *out, struct bch_sb *sb, for (entry = clean->start; entry != vstruct_end(&clean->field); entry = vstruct_next(entry)) { + if ((void *) vstruct_next(entry) > vstruct_end(&clean->field)) + break; + if (entry->type == BCH_JSET_ENTRY_btree_keys && !entry->u64s) continue; From 2bb9600d5d4735953c47dd1ee99382c68dd04caa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 May 2024 22:33:05 -0400 Subject: [PATCH 23/30] bcachefs: Guard against unknown k.k->type in __bkey_invalid() For forwards compatibility we have to allow unknown key types, and only run the checks that make sense against them. Fix a missing guard on k.k->type being known. Reported-by: syzbot+ae4dc916da3ce51f284f@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey_methods.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index db336a43fc08..a275a9e8e341 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -171,8 +171,8 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, if (type >= BKEY_TYPE_NR) return 0; - bkey_fsck_err_on((type == BKEY_TYPE_btree || - (flags & BKEY_INVALID_COMMIT)) && + bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && + (type == BKEY_TYPE_btree || (flags & BKEY_INVALID_COMMIT)) && !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", From 0ec5b3b7ccfcdca02ab322abf86455d0050ae98f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 May 2024 22:44:27 -0400 Subject: [PATCH 24/30] bcachefs: Fix shift-by-64 in bformat_needs_redo() Ancient versions of bcachefs produced packed formats that could represent keys that our in memory format cannot represent; bformat_needs_redo() has some tricky shifts to check for this sort of overflow. Reported-by: syzbot+594427aebfefeebe91c6@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index bf68ea49447b..4d94b7742dbb 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -968,24 +968,30 @@ static bool migrate_btree_pred(struct bch_fs *c, void *arg, return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); } +/* + * Ancient versions of bcachefs produced packed formats which could represent + * keys that the in memory format cannot represent; this checks for those + * formats so we can get rid of them. + */ static bool bformat_needs_redo(struct bkey_format *f) { - unsigned i; - - for (i = 0; i < f->nr_fields; i++) { + for (unsigned i = 0; i < f->nr_fields; i++) { + unsigned f_bits = f->bits_per_field[i]; unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1)); u64 field_offset = le64_to_cpu(f->field_offset[i]); - if (f->bits_per_field[i] > unpacked_bits) + if (f_bits > unpacked_bits) return true; - if ((f->bits_per_field[i] == unpacked_bits) && field_offset) + if ((f_bits == unpacked_bits) && field_offset) return true; - if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) & - unpacked_mask) < - field_offset) + u64 f_mask = f_bits + ? ~((~0ULL << (f_bits - 1)) << 1) + : 0; + + if (((field_offset + f_mask) & unpacked_mask) < field_offset) return true; } From 8060bf1d83f7d404bacb0e5a38f2d4d8f4c9dfb7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 May 2024 22:56:54 -0400 Subject: [PATCH 25/30] bcachefs: Fix snapshot_t() usage in bch2_fs_quota_read_inode() bch2_fs_quota_read_inode() wasn't entirely updated to the bch2_snapshot_tree() helper, which takes rcu lock. Reported-by: syzbot+a3a9a61224ed3b7f0010@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/quota.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index e68b34eab90a..556da0738106 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -560,13 +560,11 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans, struct bch_fs *c = trans->c; struct bch_inode_unpacked u; struct bch_snapshot_tree s_t; - int ret; + u32 tree = bch2_snapshot_tree(c, k.k->p.snapshot); - ret = bch2_snapshot_tree_lookup(trans, - bch2_snapshot_tree(c, k.k->p.snapshot), &s_t); + int ret = bch2_snapshot_tree_lookup(trans, tree, &s_t); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, - "%s: snapshot tree %u not found", __func__, - snapshot_t(c, k.k->p.snapshot)->tree); + "%s: snapshot tree %u not found", __func__, tree); if (ret) return ret; From 88ab10186c44d0a8b90842beab8648b5fd14432d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 May 2024 08:40:46 -0400 Subject: [PATCH 26/30] bcachefs: Add missing skcipher_request_set_callback() call Signed-off-by: Kent Overstreet --- fs/bcachefs/checksum.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index 7ed779b411f6..088fd2e7bdf1 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -102,6 +102,7 @@ static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm, int ret; skcipher_request_set_sync_tfm(req, tfm); + skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, len, nonce.d); ret = crypto_skcipher_encrypt(req); From 71dac2482ad3c8d4a8b8998a96751f009bad895f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 May 2024 09:10:29 -0400 Subject: [PATCH 27/30] bcachefs: BCH_SB_LAYOUT_SIZE_BITS_MAX Define a constant for the max superblock size, to avoid a too-large shift. Reported-by: syzbot+a8b0fb419355c91dda7f@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 2 ++ fs/bcachefs/super-io.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 8345a2b2d05b..2e8b1a489c20 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -903,6 +903,8 @@ unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_re #define BCH_SB_SECTOR 8 #define BCH_SB_MEMBERS_MAX 64 /* XXX kill */ +#define BCH_SB_LAYOUT_SIZE_BITS_MAX 16 /* 32 MB */ + struct bch_sb_layout { __uuid_t magic; /* bcachefs superblock UUID */ __u8 layout_type; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index e7527d551e3c..989d16bba8f0 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -649,7 +649,7 @@ reread: bytes = vstruct_bytes(sb->sb); - if (bytes > 512 << sb->sb->layout.sb_max_size_bits) { + if (bytes > 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits)) { prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)", bytes, 512UL << sb->sb->layout.sb_max_size_bits); return -BCH_ERR_invalid_sb_too_big; From 54541c1f78e12a78487ae63e2e199a7d4f6dbd26 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 May 2024 20:49:24 -0400 Subject: [PATCH 28/30] bcachefs: Fix race in bch2_write_super() bch2_write_super() was looping over online devices multiple times - dropping and retaking io_ref each time. This meant it could race with device removal; it could increment the sequence number on a device but fail to write it - and then if the device was re-added, it would get confused the next time around thinking a superblock write was silently dropped. Fix this by taking io_ref once, and stashing pointers to online devices in a darray. Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 47 ++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 989d16bba8f0..bfdb15e7d778 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -923,6 +923,7 @@ int bch2_write_super(struct bch_fs *c) struct bch_devs_mask sb_written; bool wrote, can_mount_without_written, can_mount_with_written; unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; + DARRAY(struct bch_dev *) online_devices = {}; int ret = 0; trace_and_count(c, write_super, c, _RET_IP_); @@ -935,6 +936,15 @@ int bch2_write_super(struct bch_fs *c) closure_init_stack(cl); memset(&sb_written, 0, sizeof(sb_written)); + for_each_online_member(c, ca) { + ret = darray_push(&online_devices, ca); + if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) { + percpu_ref_put(&ca->io_ref); + goto out; + } + percpu_ref_get(&ca->io_ref); + } + /* Make sure we're using the new magic numbers: */ c->disk_sb.sb->magic = BCHFS_MAGIC; c->disk_sb.sb->layout.magic = BCHFS_MAGIC; @@ -942,8 +952,8 @@ int bch2_write_super(struct bch_fs *c) le64_add_cpu(&c->disk_sb.sb->seq, 1); struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); - for_each_online_member(c, ca) - __bch2_members_v2_get_mut(mi, ca->dev_idx)->seq = c->disk_sb.sb->seq; + darray_for_each(online_devices, ca) + __bch2_members_v2_get_mut(mi, (*ca)->dev_idx)->seq = c->disk_sb.sb->seq; c->disk_sb.sb->write_time = cpu_to_le64(ktime_get_real_seconds()); if (test_bit(BCH_FS_error, &c->flags)) @@ -959,16 +969,15 @@ int bch2_write_super(struct bch_fs *c) bch2_sb_errors_from_cpu(c); bch2_sb_downgrade_update(c); - for_each_online_member(c, ca) - bch2_sb_from_fs(c, ca); + darray_for_each(online_devices, ca) + bch2_sb_from_fs(c, (*ca)); - for_each_online_member(c, ca) { + darray_for_each(online_devices, ca) { printbuf_reset(&err); - ret = bch2_sb_validate(&ca->disk_sb, &err, WRITE); + ret = bch2_sb_validate(&(*ca)->disk_sb, &err, WRITE); if (ret) { bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); - percpu_ref_put(&ca->io_ref); goto out; } } @@ -995,16 +1004,18 @@ int bch2_write_super(struct bch_fs *c) return -BCH_ERR_sb_not_downgraded; } - for_each_online_member(c, ca) { - __set_bit(ca->dev_idx, sb_written.d); - ca->sb_write_error = 0; + darray_for_each(online_devices, ca) { + __set_bit((*ca)->dev_idx, sb_written.d); + (*ca)->sb_write_error = 0; } - for_each_online_member(c, ca) - read_back_super(c, ca); + darray_for_each(online_devices, ca) + read_back_super(c, *ca); closure_sync(cl); - for_each_online_member(c, ca) { + darray_for_each(online_devices, cap) { + struct bch_dev *ca = *cap; + if (ca->sb_write_error) continue; @@ -1031,17 +1042,20 @@ int bch2_write_super(struct bch_fs *c) do { wrote = false; - for_each_online_member(c, ca) + darray_for_each(online_devices, cap) { + struct bch_dev *ca = *cap; if (!ca->sb_write_error && sb < ca->disk_sb.sb->layout.nr_superblocks) { write_one_super(c, ca, sb); wrote = true; } + } closure_sync(cl); sb++; } while (wrote); - for_each_online_member(c, ca) { + darray_for_each(online_devices, cap) { + struct bch_dev *ca = *cap; if (ca->sb_write_error) __clear_bit(ca->dev_idx, sb_written.d); else @@ -1077,6 +1091,9 @@ int bch2_write_super(struct bch_fs *c) out: /* Make new options visible after they're persistent: */ bch2_sb_update(c); + darray_for_each(online_devices, ca) + percpu_ref_put(&(*ca)->io_ref); + darray_exit(&online_devices); printbuf_exit(&err); return ret; } From 6e297a73bccf852e7716207caa8eb868737c7155 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 6 May 2024 23:11:43 -0400 Subject: [PATCH 29/30] bcachefs: Add missing sched_annotate_sleep() in bch2_journal_flush_seq_async() Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 9c2af544251a..a8b08e76d0d0 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -706,6 +706,12 @@ recheck_need_open: spin_unlock(&j->lock); + /* + * We're called from bch2_journal_flush_seq() -> wait_event(); + * but this might block. We won't usually block, so we won't + * livelock: + */ + sched_annotate_sleep(); ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); if (ret) return ret; From 96d88f65adfbcaca153afd7d3e20d74ba379c599 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 25 Apr 2024 06:44:00 -0400 Subject: [PATCH 30/30] virtiofs: include a newline in sysfs tag The internal tag string doesn't contain a newline. Append one when emitting the tag via sysfs. [Stefan] Orthogonal to the newline issue, sysfs_emit(buf, "%s", fs->tag) is needed to prevent format string injection. Signed-off-by: Brian Foster Fixes: a8f62f50b4e4 ("virtiofs: export filesystem tags through sysfs") Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 322af827a232..bb3e941b9503 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -170,7 +170,7 @@ static ssize_t tag_show(struct kobject *kobj, { struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); - return sysfs_emit(buf, fs->tag); + return sysfs_emit(buf, "%s\n", fs->tag); } static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag);