From 3318ae18bbe039615a077378e3736623852d5757 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 12 Apr 2023 16:03:03 -0700 Subject: [PATCH] ANDROID: Revert "mm: remove cleancache" This reverts commit 0a4ee518185e902758191d968600399f3bc2be31. Conflicts: Documentation/mm/cleancache.rst Documentation/vm/index.rst arch/arm/configs/bcm2835_defconfig arch/arm/configs/qcom_defconfig arch/m68k/configs/amiga_defconfig arch/m68k/configs/apollo_defconfig arch/m68k/configs/atari_defconfig arch/m68k/configs/bvme6000_defconfig arch/m68k/configs/hp300_defconfig arch/m68k/configs/mac_defconfig arch/m68k/configs/multi_defconfig arch/m68k/configs/mvme147_defconfig arch/m68k/configs/mvme16x_defconfig arch/m68k/configs/q40_defconfig arch/m68k/configs/sun3_defconfig arch/m68k/configs/sun3x_defconfig arch/s390/configs/debug_defconfig arch/s390/configs/defconfig fs/f2fs/data.c fs/mpage.c block/bdev.c fs/btrfs/extent_io.c fs/super.c block/bdev.c include/linux/fs.h mm/truncate.c 1. Skip documentation which was refactored. 2. Skip defconfigs unused in Android. 3. Replaced deprecated __submit_bio() with f2fs_submit_read_bio() 4. Replaced PageUptodate() with folio_test_uptodate() 5. Replaced SetPageUptodate() with folio_mark_uptodate() 6. Changed cleancache_get_page() call to use folio->page 7. Changed ext4_mpage_readpages() to use folio instead of page 8. Changed f2fs_read_single_page() to use folio instead of page 9. Changed btrfs_do_readpage() to use folio instead of page Bug: 271544708 Bug: 323283126 Bug: 270089503 Change-Id: I93359509f7799de72f31b002a2539565d1bda9d6 Signed-off-by: Suren Baghdasaryan [ quic_pbrahma@quicinc.com: Resolved merge conflicts and modified apis to use folio instead of page ] Signed-off-by: Pratyush Brahma --- MAINTAINERS | 7 + block/bdev.c | 5 + fs/btrfs/extent_io.c | 13 +- fs/btrfs/super.c | 2 + fs/ext4/readpage.c | 6 + fs/ext4/super.c | 3 + fs/f2fs/data.c | 14 ++ fs/mpage.c | 7 + fs/ntfs3/ntfs_fs.h | 1 + fs/ocfs2/super.c | 2 + fs/super.c | 3 + include/linux/cleancache.h | 124 +++++++++++++++ include/linux/fs.h | 5 + mm/Kconfig | 22 +++ mm/Makefile | 1 + mm/cleancache.c | 315 +++++++++++++++++++++++++++++++++++++ mm/filemap.c | 11 ++ mm/truncate.c | 15 +- 18 files changed, 553 insertions(+), 3 deletions(-) create mode 100644 include/linux/cleancache.h create mode 100644 mm/cleancache.c diff --git a/MAINTAINERS b/MAINTAINERS index f8b203f02e62..64be0770bc4b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5548,6 +5548,13 @@ F: scripts/Makefile.clang F: scripts/clang-tools/ K: \b(?i:clang|llvm)\b +CLEANCACHE API +M: Konrad Rzeszutek Wilk +L: linux-kernel@vger.kernel.org +S: Maintained +F: include/linux/cleancache.h +F: mm/cleancache.c + CLK API M: Russell King L: linux-clk@vger.kernel.org diff --git a/block/bdev.c b/block/bdev.c index 738e3c8457e7..134e3db98bc8 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -101,6 +102,10 @@ void invalidate_bdev(struct block_device *bdev) lru_add_drain_all(); /* make sure all lru add caches are flushed */ invalidate_mapping_pages(mapping, 0, -1); } + /* 99% of the time, we don't need to flush the cleancache on the bdev. + * But, for the strange corners, lets be cautious + */ + cleancache_invalidate_inode(mapping); } EXPORT_SYMBOL(invalidate_bdev); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 872cca54cc6c..2355cf682b1b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "extent_io.h" #include "extent-io-tree.h" @@ -956,6 +957,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, size_t pg_offset = 0; size_t iosize; size_t blocksize = fs_info->sectorsize; + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; ret = set_folio_extent_mapped(folio); if (ret < 0) { @@ -963,6 +965,15 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, return ret; } + if (!folio_test_uptodate(folio)) { + if (cleancache_get_page(&folio->page) == 0) { + BUG_ON(blocksize != folio_size(folio)); + unlock_extent(tree, start, end, NULL); + folio_unlock(folio); + goto out; + } + } + if (folio->index == last_byte >> folio_shift(folio)) { size_t zero_offset = offset_in_folio(folio, last_byte); @@ -1081,7 +1092,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, cur = cur + iosize; pg_offset += iosize; } - +out: return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c64d07134122..644039e45d21 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -990,6 +991,7 @@ static int btrfs_fill_super(struct super_block *sb, goto fail_close; } + cleancache_init_fs(sb); sb->s_flags |= SB_ACTIVE; return 0; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 5d3a9dc9a32d..f9a15a79c1b2 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "ext4.h" @@ -334,6 +335,11 @@ int ext4_mpage_readpages(struct inode *inode, } else if (fully_mapped) { folio_set_mappedtodisk(folio); } + if (fully_mapped && blocks_per_page == 1 && + !folio_test_uptodate(folio) && cleancache_get_page(&folio->page) == 0) { + folio_mark_uptodate(folio); + goto confused; + } /* * This folio will go to BIO. Do we need to send this diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 16a4ce704460..3154157603e9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -3105,6 +3106,8 @@ done: EXT4_BLOCKS_PER_GROUP(sb), EXT4_INODES_PER_GROUP(sb), sbi->s_mount_opt, sbi->s_mount_opt2); + + cleancache_init_fs(sb); return err; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a41350821099..fc944e8a30ed 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -2134,6 +2135,12 @@ got_it: block_nr = map->m_pblk + block_in_file - map->m_lblk; folio_set_mappedtodisk(folio); + if (!!folio_test_uptodate(folio) && (!folio_test_swapcache(folio) && + !cleancache_get_page(&folio->page))) { + folio_mark_uptodate(folio); + goto confused; + } + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { ret = -EFSCORRUPTED; @@ -2188,6 +2195,13 @@ submit_and_realloc: f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO, F2FS_BLKSIZE); *last_block_in_bio = block_nr; + goto out; +confused: + if (bio) { + f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); + bio = NULL; + } + folio_unlock(folio); out: *bio_ret = bio; return ret; diff --git a/fs/mpage.c b/fs/mpage.c index b5b5ddf9d513..bae6050022d6 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "internal.h" /* @@ -279,6 +280,12 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) folio_set_mappedtodisk(folio); } + if (fully_mapped && blocks_per_page == 1 && !folio_test_uptodate(folio) && + cleancache_get_page(&folio->page) == 0) { + folio_mark_uptodate(folio); + goto confused; + } + /* * This folio will go to BIO. Do we need to send this BIO off first? */ diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 26e1e1379c04..f237baad6702 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index c79b4291777f..7ff326a6c8fc 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -2274,6 +2275,7 @@ static int ocfs2_initialize_super(struct super_block *sb, mlog_errno(status); goto out_system_inodes; } + cleancache_init_shared_fs(sb); osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM); if (!osb->ocfs2_wq) { diff --git a/fs/super.c b/fs/super.c index c9c7223bc2a2..6a68a14b0313 100644 --- a/fs/super.c +++ b/fs/super.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -374,6 +375,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_time_gran = 1000000000; s->s_time_min = TIME64_MIN; s->s_time_max = TIME64_MAX; + s->cleancache_poolid = CLEANCACHE_NO_POOL; s->s_shrink = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "sb-%s", type->name); @@ -469,6 +471,7 @@ void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { + cleancache_invalidate_fs(s); shrinker_free(s->s_shrink); fs->kill_sb(s); diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h new file mode 100644 index 000000000000..5f5730c1d324 --- /dev/null +++ b/include/linux/cleancache.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CLEANCACHE_H +#define _LINUX_CLEANCACHE_H + +#include +#include +#include + +#define CLEANCACHE_NO_POOL -1 +#define CLEANCACHE_NO_BACKEND -2 +#define CLEANCACHE_NO_BACKEND_SHARED -3 + +#define CLEANCACHE_KEY_MAX 6 + +/* + * cleancache requires every file with a page in cleancache to have a + * unique key unless/until the file is removed/truncated. For some + * filesystems, the inode number is unique, but for "modern" filesystems + * an exportable filehandle is required (see exportfs.h) + */ +struct cleancache_filekey { + union { + ino_t ino; + __u32 fh[CLEANCACHE_KEY_MAX]; + u32 key[CLEANCACHE_KEY_MAX]; + } u; +}; + +struct cleancache_ops { + int (*init_fs)(size_t); + int (*init_shared_fs)(uuid_t *uuid, size_t); + int (*get_page)(int, struct cleancache_filekey, + pgoff_t, struct page *); + void (*put_page)(int, struct cleancache_filekey, + pgoff_t, struct page *); + void (*invalidate_page)(int, struct cleancache_filekey, pgoff_t); + void (*invalidate_inode)(int, struct cleancache_filekey); + void (*invalidate_fs)(int); +}; + +extern int cleancache_register_ops(const struct cleancache_ops *ops); +extern void __cleancache_init_fs(struct super_block *); +extern void __cleancache_init_shared_fs(struct super_block *); +extern int __cleancache_get_page(struct page *); +extern void __cleancache_put_page(struct page *); +extern void __cleancache_invalidate_page(struct address_space *, struct page *); +extern void __cleancache_invalidate_inode(struct address_space *); +extern void __cleancache_invalidate_fs(struct super_block *); + +#ifdef CONFIG_CLEANCACHE +#define cleancache_enabled (1) +static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping) +{ + return mapping->host->i_sb->cleancache_poolid >= 0; +} +static inline bool cleancache_fs_enabled(struct page *page) +{ + return cleancache_fs_enabled_mapping(page->mapping); +} +#else +#define cleancache_enabled (0) +#define cleancache_fs_enabled(_page) (0) +#define cleancache_fs_enabled_mapping(_page) (0) +#endif + +/* + * The shim layer provided by these inline functions allows the compiler + * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE + * is disabled, to a single global variable check if CONFIG_CLEANCACHE + * is enabled but no cleancache "backend" has dynamically enabled it, + * and, for the most frequent cleancache ops, to a single global variable + * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled + * and a cleancache backend has dynamically enabled cleancache, but the + * filesystem referenced by that cleancache op has not enabled cleancache. + * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially + * no measurable performance impact. + */ + +static inline void cleancache_init_fs(struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_init_fs(sb); +} + +static inline void cleancache_init_shared_fs(struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_init_shared_fs(sb); +} + +static inline int cleancache_get_page(struct page *page) +{ + if (cleancache_enabled && cleancache_fs_enabled(page)) + return __cleancache_get_page(page); + return -1; +} + +static inline void cleancache_put_page(struct page *page) +{ + if (cleancache_enabled && cleancache_fs_enabled(page)) + __cleancache_put_page(page); +} + +static inline void cleancache_invalidate_page(struct address_space *mapping, + struct page *page) +{ + /* careful... page->mapping is NULL sometimes when this is called */ + if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) + __cleancache_invalidate_page(mapping, page); +} + +static inline void cleancache_invalidate_inode(struct address_space *mapping) +{ + if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) + __cleancache_invalidate_inode(mapping); +} + +static inline void cleancache_invalidate_fs(struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_invalidate_fs(sb); +} + +#endif /* _LINUX_CLEANCACHE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 3559446279c1..661f0034c433 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1345,6 +1345,11 @@ struct super_block { const struct dentry_operations *s_d_op; /* default d_op for dentries */ + /* + * Saved pool identifier for cleancache (-1 means none) + */ + int cleancache_poolid; + struct shrinker *s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ diff --git a/mm/Kconfig b/mm/Kconfig index 520e12b99148..6e187ffd607d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -929,6 +929,28 @@ config USE_PERCPU_NUMA_NODE_ID config HAVE_SETUP_PER_CPU_AREA bool +config CLEANCACHE + bool "Enable cleancache driver to cache clean pages if tmem is present" + help + Cleancache can be thought of as a page-granularity victim cache + for clean pages that the kernel's pageframe replacement algorithm + (PFRA) would like to keep around, but can't since there isn't enough + memory. So when the PFRA "evicts" a page, it first attempts to use + cleancache code to put the data contained in that page into + "transcendent memory", memory that is not directly accessible or + addressable by the kernel and is of unknown and possibly + time-varying size. And when a cleancache-enabled + filesystem wishes to access a page in a file on disk, it first + checks cleancache to see if it already contains it; if it does, + the page is copied into the kernel and a disk access is avoided. + When a transcendent memory driver is available (such as zcache or + Xen transcendent memory), a significant I/O reduction + may be achieved. When none is available, all cleancache calls + are reduced to a single pointer-compare-against-NULL resulting + in a negligible performance hit. + + If unsure, say Y to enable cleancache + config CMA bool "Contiguous Memory Allocator" depends on MMU diff --git a/mm/Makefile b/mm/Makefile index d5639b036166..90818d75432d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -110,6 +110,7 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o obj-$(CONFIG_PAGE_OWNER) += page_owner.o +obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o obj-$(CONFIG_ZPOOL) += zpool.o obj-$(CONFIG_ZBUD) += zbud.o diff --git a/mm/cleancache.c b/mm/cleancache.c new file mode 100644 index 000000000000..db7eee9c0886 --- /dev/null +++ b/mm/cleancache.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Cleancache frontend + * + * This code provides the generic "frontend" layer to call a matching + * "backend" driver implementation of cleancache. See + * Documentation/vm/cleancache.rst for more information. + * + * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. + * Author: Dan Magenheimer + */ + +#include +#include +#include +#include +#include +#include + +/* + * cleancache_ops is set by cleancache_register_ops to contain the pointers + * to the cleancache "backend" implementation functions. + */ +static const struct cleancache_ops *cleancache_ops __read_mostly; + +/* + * Counters available via /sys/kernel/debug/cleancache (if debugfs is + * properly configured. These are for information only so are not protected + * against increment races. + */ +static u64 cleancache_succ_gets; +static u64 cleancache_failed_gets; +static u64 cleancache_puts; +static u64 cleancache_invalidates; + +static void cleancache_register_ops_sb(struct super_block *sb, void *unused) +{ + switch (sb->cleancache_poolid) { + case CLEANCACHE_NO_BACKEND: + __cleancache_init_fs(sb); + break; + case CLEANCACHE_NO_BACKEND_SHARED: + __cleancache_init_shared_fs(sb); + break; + } +} + +/* + * Register operations for cleancache. Returns 0 on success. + */ +int cleancache_register_ops(const struct cleancache_ops *ops) +{ + if (cmpxchg(&cleancache_ops, NULL, ops)) + return -EBUSY; + + /* + * A cleancache backend can be built as a module and hence loaded after + * a cleancache enabled filesystem has called cleancache_init_fs. To + * handle such a scenario, here we call ->init_fs or ->init_shared_fs + * for each active super block. To differentiate between local and + * shared filesystems, we temporarily initialize sb->cleancache_poolid + * to CLEANCACHE_NO_BACKEND or CLEANCACHE_NO_BACKEND_SHARED + * respectively in case there is no backend registered at the time + * cleancache_init_fs or cleancache_init_shared_fs is called. + * + * Since filesystems can be mounted concurrently with cleancache + * backend registration, we have to be careful to guarantee that all + * cleancache enabled filesystems that has been mounted by the time + * cleancache_register_ops is called has got and all mounted later will + * get cleancache_poolid. This is assured by the following statements + * tied together: + * + * a) iterate_supers skips only those super blocks that has started + * ->kill_sb + * + * b) if iterate_supers encounters a super block that has not finished + * ->mount yet, it waits until it is finished + * + * c) cleancache_init_fs is called from ->mount and + * cleancache_invalidate_fs is called from ->kill_sb + * + * d) we call iterate_supers after cleancache_ops has been set + * + * From a) it follows that if iterate_supers skips a super block, then + * either the super block is already dead, in which case we do not need + * to bother initializing cleancache for it, or it was mounted after we + * initiated iterate_supers. In the latter case, it must have seen + * cleancache_ops set according to d) and initialized cleancache from + * ->mount by itself according to c). This proves that we call + * ->init_fs at least once for each active super block. + * + * From b) and c) it follows that if iterate_supers encounters a super + * block that has already started ->init_fs, it will wait until ->mount + * and hence ->init_fs has finished, then check cleancache_poolid, see + * that it has already been set and therefore do nothing. This proves + * that we call ->init_fs no more than once for each super block. + * + * Combined together, the last two paragraphs prove the function + * correctness. + * + * Note that various cleancache callbacks may proceed before this + * function is called or even concurrently with it, but since + * CLEANCACHE_NO_BACKEND is negative, they will all result in a noop + * until the corresponding ->init_fs has been actually called and + * cleancache_ops has been set. + */ + iterate_supers(cleancache_register_ops_sb, NULL); + return 0; +} +EXPORT_SYMBOL(cleancache_register_ops); + +/* Called by a cleancache-enabled filesystem at time of mount */ +void __cleancache_init_fs(struct super_block *sb) +{ + int pool_id = CLEANCACHE_NO_BACKEND; + + if (cleancache_ops) { + pool_id = cleancache_ops->init_fs(PAGE_SIZE); + if (pool_id < 0) + pool_id = CLEANCACHE_NO_POOL; + } + sb->cleancache_poolid = pool_id; +} +EXPORT_SYMBOL(__cleancache_init_fs); + +/* Called by a cleancache-enabled clustered filesystem at time of mount */ +void __cleancache_init_shared_fs(struct super_block *sb) +{ + int pool_id = CLEANCACHE_NO_BACKEND_SHARED; + + if (cleancache_ops) { + pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE); + if (pool_id < 0) + pool_id = CLEANCACHE_NO_POOL; + } + sb->cleancache_poolid = pool_id; +} +EXPORT_SYMBOL(__cleancache_init_shared_fs); + +/* + * If the filesystem uses exportable filehandles, use the filehandle as + * the key, else use the inode number. + */ +static int cleancache_get_key(struct inode *inode, + struct cleancache_filekey *key) +{ + int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *); + int len = 0, maxlen = CLEANCACHE_KEY_MAX; + struct super_block *sb = inode->i_sb; + + key->u.ino = inode->i_ino; + if (sb->s_export_op != NULL) { + fhfn = sb->s_export_op->encode_fh; + if (fhfn) { + len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL); + if (len <= FILEID_ROOT || len == FILEID_INVALID) + return -1; + if (maxlen > CLEANCACHE_KEY_MAX) + return -1; + } + } + return 0; +} + +/* + * "Get" data from cleancache associated with the poolid/inode/index + * that were specified when the data was put to cleanache and, if + * successful, use it to fill the specified page with data and return 0. + * The pageframe is unchanged and returns -1 if the get fails. + * Page must be locked by caller. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. + */ +int __cleancache_get_page(struct page *page) +{ + int ret = -1; + int pool_id; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (!cleancache_ops) { + cleancache_failed_gets++; + goto out; + } + + VM_BUG_ON_PAGE(!PageLocked(page), page); + pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (pool_id < 0) + goto out; + + if (cleancache_get_key(page->mapping->host, &key) < 0) + goto out; + + ret = cleancache_ops->get_page(pool_id, key, page->index, page); + if (ret == 0) + cleancache_succ_gets++; + else + cleancache_failed_gets++; +out: + return ret; +} +EXPORT_SYMBOL(__cleancache_get_page); + +/* + * "Put" data from a page to cleancache and associate it with the + * (previously-obtained per-filesystem) poolid and the page's, + * inode and page index. Page must be locked. Note that a put_page + * always "succeeds", though a subsequent get_page may succeed or fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. + */ +void __cleancache_put_page(struct page *page) +{ + int pool_id; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (!cleancache_ops) { + cleancache_puts++; + return; + } + + VM_BUG_ON_PAGE(!PageLocked(page), page); + pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (pool_id >= 0 && + cleancache_get_key(page->mapping->host, &key) >= 0) { + cleancache_ops->put_page(pool_id, key, page->index, page); + cleancache_puts++; + } +} +EXPORT_SYMBOL(__cleancache_put_page); + +/* + * Invalidate any data from cleancache associated with the poolid and the + * page's inode and page index so that a subsequent "get" will fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. + */ +void __cleancache_invalidate_page(struct address_space *mapping, + struct page *page) +{ + /* careful... page->mapping is NULL sometimes when this is called */ + int pool_id = mapping->host->i_sb->cleancache_poolid; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (!cleancache_ops) + return; + + if (pool_id >= 0) { + VM_BUG_ON_PAGE(!PageLocked(page), page); + if (cleancache_get_key(mapping->host, &key) >= 0) { + cleancache_ops->invalidate_page(pool_id, + key, page->index); + cleancache_invalidates++; + } + } +} +EXPORT_SYMBOL(__cleancache_invalidate_page); + +/* + * Invalidate all data from cleancache associated with the poolid and the + * mappings's inode so that all subsequent gets to this poolid/inode + * will fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. + */ +void __cleancache_invalidate_inode(struct address_space *mapping) +{ + int pool_id = mapping->host->i_sb->cleancache_poolid; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (!cleancache_ops) + return; + + if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) + cleancache_ops->invalidate_inode(pool_id, key); +} +EXPORT_SYMBOL(__cleancache_invalidate_inode); + +/* + * Called by any cleancache-enabled filesystem at time of unmount; + * note that pool_id is surrendered and may be returned by a subsequent + * cleancache_init_fs or cleancache_init_shared_fs. + */ +void __cleancache_invalidate_fs(struct super_block *sb) +{ + int pool_id; + + pool_id = sb->cleancache_poolid; + sb->cleancache_poolid = CLEANCACHE_NO_POOL; + + if (cleancache_ops && pool_id >= 0) + cleancache_ops->invalidate_fs(pool_id); +} +EXPORT_SYMBOL(__cleancache_invalidate_fs); + +static int __init init_cleancache(void) +{ +#ifdef CONFIG_DEBUG_FS + struct dentry *root = debugfs_create_dir("cleancache", NULL); + + debugfs_create_u64("succ_gets", 0444, root, &cleancache_succ_gets); + debugfs_create_u64("failed_gets", 0444, root, &cleancache_failed_gets); + debugfs_create_u64("puts", 0444, root, &cleancache_puts); + debugfs_create_u64("invalidates", 0444, root, &cleancache_invalidates); +#endif + return 0; +} +module_init(init_cleancache) diff --git a/mm/filemap.c b/mm/filemap.c index 56fa431c52af..d7e89f65ea9c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -160,6 +161,16 @@ static void filemap_unaccount_folio(struct address_space *mapping, { long nr; + /* + * if we're uptodate, flush out into the cleancache, otherwise + * invalidate any existing cleancache entries. We can't leave + * stale data around in the cleancache once our page is gone + */ + if (folio_test_uptodate(folio) && folio_test_mappedtodisk(folio)) + cleancache_put_page(&folio->page); + else + cleancache_invalidate_page(mapping, &folio->page); + VM_BUG_ON_FOLIO(folio_mapped(folio), folio); if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) { pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n", diff --git a/mm/truncate.c b/mm/truncate.c index 0668cd340a46..12f9e3219acd 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "internal.h" @@ -219,6 +220,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) if (!mapping_inaccessible(folio->mapping)) folio_zero_range(folio, offset, length); + cleancache_invalidate_page(folio->mapping, &folio->page); if (folio_needs_release(folio)) folio_invalidate(folio, offset, length); if (!folio_test_large(folio)) @@ -314,7 +316,7 @@ void truncate_inode_pages_range(struct address_space *mapping, bool same_folio; if (mapping_empty(mapping)) - return; + goto out; /* * 'start' and 'end' always covers the range of pages to be fully @@ -402,6 +404,9 @@ void truncate_inode_pages_range(struct address_space *mapping, truncate_folio_batch_exceptionals(mapping, &fbatch, indices); folio_batch_release(&fbatch); } + +out: + cleancache_invalidate_inode(mapping); } EXPORT_SYMBOL(truncate_inode_pages_range); @@ -455,6 +460,10 @@ void truncate_inode_pages_final(struct address_space *mapping) xa_unlock_irq(&mapping->i_pages); } + /* + * Cleancache needs notification even if there are no pages or shadow + * entries. + */ truncate_inode_pages(mapping, 0); } EXPORT_SYMBOL(truncate_inode_pages_final); @@ -608,7 +617,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, bool xa_has_values = false; if (mapping_empty(mapping)) - return 0; + goto out; folio_batch_init(&fbatch); index = start; @@ -675,6 +684,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping, if (dax_mapping(mapping)) { unmap_mapping_pages(mapping, start, end - start + 1, false); } +out: + cleancache_invalidate_inode(mapping); return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);