From 3318ae18bbe039615a077378e3736623852d5757 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 12 Apr 2023 16:03:03 -0700
Subject: [PATCH] ANDROID: Revert "mm: remove cleancache"

This reverts commit 0a4ee518185e902758191d968600399f3bc2be31.

Conflicts:
	Documentation/mm/cleancache.rst
	Documentation/vm/index.rst
	arch/arm/configs/bcm2835_defconfig
	arch/arm/configs/qcom_defconfig
	arch/m68k/configs/amiga_defconfig
	arch/m68k/configs/apollo_defconfig
	arch/m68k/configs/atari_defconfig
	arch/m68k/configs/bvme6000_defconfig
	arch/m68k/configs/hp300_defconfig
	arch/m68k/configs/mac_defconfig
	arch/m68k/configs/multi_defconfig
	arch/m68k/configs/mvme147_defconfig
	arch/m68k/configs/mvme16x_defconfig
	arch/m68k/configs/q40_defconfig
	arch/m68k/configs/sun3_defconfig
	arch/m68k/configs/sun3x_defconfig
	arch/s390/configs/debug_defconfig
	arch/s390/configs/defconfig
	fs/f2fs/data.c
	fs/mpage.c
	block/bdev.c
	fs/btrfs/extent_io.c
	fs/super.c
	block/bdev.c
	include/linux/fs.h
	mm/truncate.c

1. Skip documentation which was refactored.
2. Skip defconfigs unused in Android.
3. Replaced deprecated __submit_bio() with f2fs_submit_read_bio()
4. Replaced PageUptodate() with folio_test_uptodate()
5. Replaced SetPageUptodate() with folio_mark_uptodate()
6. Changed cleancache_get_page() call to use folio->page
7. Changed ext4_mpage_readpages() to use folio instead of page
8. Changed f2fs_read_single_page() to use folio instead of page
9. Changed btrfs_do_readpage() to use folio instead of page

Bug: 271544708
Bug: 323283126
Bug: 270089503
Change-Id: I93359509f7799de72f31b002a2539565d1bda9d6
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
[ quic_pbrahma@quicinc.com: Resolved merge conflicts and
modified apis to use folio instead of page ]
Signed-off-by: Pratyush Brahma <quic_pbrahma@quicinc.com>
---
 MAINTAINERS                |   7 +
 block/bdev.c               |   5 +
 fs/btrfs/extent_io.c       |  13 +-
 fs/btrfs/super.c           |   2 +
 fs/ext4/readpage.c         |   6 +
 fs/ext4/super.c            |   3 +
 fs/f2fs/data.c             |  14 ++
 fs/mpage.c                 |   7 +
 fs/ntfs3/ntfs_fs.h         |   1 +
 fs/ocfs2/super.c           |   2 +
 fs/super.c                 |   3 +
 include/linux/cleancache.h | 124 +++++++++++++++
 include/linux/fs.h         |   5 +
 mm/Kconfig                 |  22 +++
 mm/Makefile                |   1 +
 mm/cleancache.c            | 315 +++++++++++++++++++++++++++++++++++++
 mm/filemap.c               |  11 ++
 mm/truncate.c              |  15 +-
 18 files changed, 553 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/cleancache.h
 create mode 100644 mm/cleancache.c

diff --git a/MAINTAINERS b/MAINTAINERS
index f8b203f02e62..64be0770bc4b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5548,6 +5548,13 @@ F:	scripts/Makefile.clang
 F:	scripts/clang-tools/
 K:	\b(?i:clang|llvm)\b
 
+CLEANCACHE API
+M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	include/linux/cleancache.h
+F:	mm/cleancache.c
+
 CLK API
 M:	Russell King <linux@armlinux.org.uk>
 L:	linux-clk@vger.kernel.org
diff --git a/block/bdev.c b/block/bdev.c
index 738e3c8457e7..134e3db98bc8 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -25,6 +25,7 @@
 #include <linux/uio.h>
 #include <linux/namei.h>
 #include <linux/security.h>
+#include <linux/cleancache.h>
 #include <linux/part_stat.h>
 #include <linux/uaccess.h>
 #include <linux/stat.h>
@@ -101,6 +102,10 @@ void invalidate_bdev(struct block_device *bdev)
 		lru_add_drain_all();	/* make sure all lru add caches are flushed */
 		invalidate_mapping_pages(mapping, 0, -1);
 	}
+	/* 99% of the time, we don't need to flush the cleancache on the bdev.
+	 * But, for the strange corners, lets be cautious
+	 */
+	cleancache_invalidate_inode(mapping);
 }
 EXPORT_SYMBOL(invalidate_bdev);
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 872cca54cc6c..2355cf682b1b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -13,6 +13,7 @@
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
 #include <linux/prefetch.h>
+#include <linux/cleancache.h>
 #include <linux/fsverity.h>
 #include "extent_io.h"
 #include "extent-io-tree.h"
@@ -956,6 +957,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
 	size_t pg_offset = 0;
 	size_t iosize;
 	size_t blocksize = fs_info->sectorsize;
+	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 
 	ret = set_folio_extent_mapped(folio);
 	if (ret < 0) {
@@ -963,6 +965,15 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
 		return ret;
 	}
 
+	if (!folio_test_uptodate(folio)) {
+		if (cleancache_get_page(&folio->page) == 0) {
+			BUG_ON(blocksize != folio_size(folio));
+			unlock_extent(tree, start, end, NULL);
+			folio_unlock(folio);
+			goto out;
+		}
+	}
+
 	if (folio->index == last_byte >> folio_shift(folio)) {
 		size_t zero_offset = offset_in_folio(folio, last_byte);
 
@@ -1081,7 +1092,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
 		cur = cur + iosize;
 		pg_offset += iosize;
 	}
-
+out:
 	return 0;
 }
 
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index c64d07134122..644039e45d21 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -23,6 +23,7 @@
 #include <linux/miscdevice.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/cleancache.h>
 #include <linux/ratelimit.h>
 #include <linux/crc32c.h>
 #include <linux/btrfs.h>
@@ -990,6 +991,7 @@ static int btrfs_fill_super(struct super_block *sb,
 		goto fail_close;
 	}
 
+	cleancache_init_fs(sb);
 	sb->s_flags |= SB_ACTIVE;
 	return 0;
 
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 5d3a9dc9a32d..f9a15a79c1b2 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -43,6 +43,7 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
+#include <linux/cleancache.h>
 
 #include "ext4.h"
 
@@ -334,6 +335,11 @@ int ext4_mpage_readpages(struct inode *inode,
 		} else if (fully_mapped) {
 			folio_set_mappedtodisk(folio);
 		}
+		if (fully_mapped && blocks_per_page == 1 &&
+		    !folio_test_uptodate(folio) && cleancache_get_page(&folio->page) == 0) {
+			folio_mark_uptodate(folio);
+			goto confused;
+		}
 
 		/*
 		 * This folio will go to BIO.  Do we need to send this
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 16a4ce704460..3154157603e9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -39,6 +39,7 @@
 #include <linux/log2.h>
 #include <linux/crc16.h>
 #include <linux/dax.h>
+#include <linux/cleancache.h>
 #include <linux/uaccess.h>
 #include <linux/iversion.h>
 #include <linux/unicode.h>
@@ -3105,6 +3106,8 @@ done:
 			EXT4_BLOCKS_PER_GROUP(sb),
 			EXT4_INODES_PER_GROUP(sb),
 			sbi->s_mount_opt, sbi->s_mount_opt2);
+
+	cleancache_init_fs(sb);
 	return err;
 }
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a41350821099..fc944e8a30ed 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -17,6 +17,7 @@
 #include <linux/swap.h>
 #include <linux/prefetch.h>
 #include <linux/uio.h>
+#include <linux/cleancache.h>
 #include <linux/sched/signal.h>
 #include <linux/fiemap.h>
 #include <linux/iomap.h>
@@ -2134,6 +2135,12 @@ got_it:
 		block_nr = map->m_pblk + block_in_file - map->m_lblk;
 		folio_set_mappedtodisk(folio);
 
+		if (!!folio_test_uptodate(folio) && (!folio_test_swapcache(folio) &&
+					!cleancache_get_page(&folio->page))) {
+			folio_mark_uptodate(folio);
+			goto confused;
+		}
+
 		if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
 						DATA_GENERIC_ENHANCE_READ)) {
 			ret = -EFSCORRUPTED;
@@ -2188,6 +2195,13 @@ submit_and_realloc:
 	f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
 							F2FS_BLKSIZE);
 	*last_block_in_bio = block_nr;
+	goto out;
+confused:
+	if (bio) {
+		f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
+		bio = NULL;
+	}
+	folio_unlock(folio);
 out:
 	*bio_ret = bio;
 	return ret;
diff --git a/fs/mpage.c b/fs/mpage.c
index b5b5ddf9d513..bae6050022d6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -29,6 +29,7 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
+#include <linux/cleancache.h>
 #include "internal.h"
 
 /*
@@ -279,6 +280,12 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 		folio_set_mappedtodisk(folio);
 	}
 
+	if (fully_mapped && blocks_per_page == 1 && !folio_test_uptodate(folio) &&
+	    cleancache_get_page(&folio->page) == 0) {
+		folio_mark_uptodate(folio);
+		goto confused;
+	}
+
 	/*
 	 * This folio will go to BIO.  Do we need to send this BIO off first?
 	 */
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 26e1e1379c04..f237baad6702 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -11,6 +11,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
+#include <linux/cleancache.h>
 #include <linux/fs.h>
 #include <linux/highmem.h>
 #include <linux/kernel.h>
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index c79b4291777f..7ff326a6c8fc 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -25,6 +25,7 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/quotaops.h>
+#include <linux/cleancache.h>
 #include <linux/signal.h>
 
 #define CREATE_TRACE_POINTS
@@ -2274,6 +2275,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 		mlog_errno(status);
 		goto out_system_inodes;
 	}
+	cleancache_init_shared_fs(sb);
 
 	osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM);
 	if (!osb->ocfs2_wq) {
diff --git a/fs/super.c b/fs/super.c
index c9c7223bc2a2..6a68a14b0313 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -31,6 +31,7 @@
 #include <linux/mutex.h>
 #include <linux/backing-dev.h>
 #include <linux/rculist_bl.h>
+#include <linux/cleancache.h>
 #include <linux/fscrypt.h>
 #include <linux/fsnotify.h>
 #include <linux/lockdep.h>
@@ -374,6 +375,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
 	s->s_time_gran = 1000000000;
 	s->s_time_min = TIME64_MIN;
 	s->s_time_max = TIME64_MAX;
+	s->cleancache_poolid = CLEANCACHE_NO_POOL;
 
 	s->s_shrink = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE,
 				     "sb-%s", type->name);
@@ -469,6 +471,7 @@ void deactivate_locked_super(struct super_block *s)
 {
 	struct file_system_type *fs = s->s_type;
 	if (atomic_dec_and_test(&s->s_active)) {
+		cleancache_invalidate_fs(s);
 		shrinker_free(s->s_shrink);
 		fs->kill_sb(s);
 
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
new file mode 100644
index 000000000000..5f5730c1d324
--- /dev/null
+++ b/include/linux/cleancache.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CLEANCACHE_H
+#define _LINUX_CLEANCACHE_H
+
+#include <linux/fs.h>
+#include <linux/exportfs.h>
+#include <linux/mm.h>
+
+#define CLEANCACHE_NO_POOL		-1
+#define CLEANCACHE_NO_BACKEND		-2
+#define CLEANCACHE_NO_BACKEND_SHARED	-3
+
+#define CLEANCACHE_KEY_MAX 6
+
+/*
+ * cleancache requires every file with a page in cleancache to have a
+ * unique key unless/until the file is removed/truncated.  For some
+ * filesystems, the inode number is unique, but for "modern" filesystems
+ * an exportable filehandle is required (see exportfs.h)
+ */
+struct cleancache_filekey {
+	union {
+		ino_t ino;
+		__u32 fh[CLEANCACHE_KEY_MAX];
+		u32 key[CLEANCACHE_KEY_MAX];
+	} u;
+};
+
+struct cleancache_ops {
+	int (*init_fs)(size_t);
+	int (*init_shared_fs)(uuid_t *uuid, size_t);
+	int (*get_page)(int, struct cleancache_filekey,
+			pgoff_t, struct page *);
+	void (*put_page)(int, struct cleancache_filekey,
+			pgoff_t, struct page *);
+	void (*invalidate_page)(int, struct cleancache_filekey, pgoff_t);
+	void (*invalidate_inode)(int, struct cleancache_filekey);
+	void (*invalidate_fs)(int);
+};
+
+extern int cleancache_register_ops(const struct cleancache_ops *ops);
+extern void __cleancache_init_fs(struct super_block *);
+extern void __cleancache_init_shared_fs(struct super_block *);
+extern int  __cleancache_get_page(struct page *);
+extern void __cleancache_put_page(struct page *);
+extern void __cleancache_invalidate_page(struct address_space *, struct page *);
+extern void __cleancache_invalidate_inode(struct address_space *);
+extern void __cleancache_invalidate_fs(struct super_block *);
+
+#ifdef CONFIG_CLEANCACHE
+#define cleancache_enabled (1)
+static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
+{
+	return mapping->host->i_sb->cleancache_poolid >= 0;
+}
+static inline bool cleancache_fs_enabled(struct page *page)
+{
+	return cleancache_fs_enabled_mapping(page->mapping);
+}
+#else
+#define cleancache_enabled (0)
+#define cleancache_fs_enabled(_page) (0)
+#define cleancache_fs_enabled_mapping(_page) (0)
+#endif
+
+/*
+ * The shim layer provided by these inline functions allows the compiler
+ * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE
+ * is disabled, to a single global variable check if CONFIG_CLEANCACHE
+ * is enabled but no cleancache "backend" has dynamically enabled it,
+ * and, for the most frequent cleancache ops, to a single global variable
+ * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled
+ * and a cleancache backend has dynamically enabled cleancache, but the
+ * filesystem referenced by that cleancache op has not enabled cleancache.
+ * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially
+ * no measurable performance impact.
+ */
+
+static inline void cleancache_init_fs(struct super_block *sb)
+{
+	if (cleancache_enabled)
+		__cleancache_init_fs(sb);
+}
+
+static inline void cleancache_init_shared_fs(struct super_block *sb)
+{
+	if (cleancache_enabled)
+		__cleancache_init_shared_fs(sb);
+}
+
+static inline int cleancache_get_page(struct page *page)
+{
+	if (cleancache_enabled && cleancache_fs_enabled(page))
+		return __cleancache_get_page(page);
+	return -1;
+}
+
+static inline void cleancache_put_page(struct page *page)
+{
+	if (cleancache_enabled && cleancache_fs_enabled(page))
+		__cleancache_put_page(page);
+}
+
+static inline void cleancache_invalidate_page(struct address_space *mapping,
+					struct page *page)
+{
+	/* careful... page->mapping is NULL sometimes when this is called */
+	if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
+		__cleancache_invalidate_page(mapping, page);
+}
+
+static inline void cleancache_invalidate_inode(struct address_space *mapping)
+{
+	if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
+		__cleancache_invalidate_inode(mapping);
+}
+
+static inline void cleancache_invalidate_fs(struct super_block *sb)
+{
+	if (cleancache_enabled)
+		__cleancache_invalidate_fs(sb);
+}
+
+#endif /* _LINUX_CLEANCACHE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3559446279c1..661f0034c433 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1345,6 +1345,11 @@ struct super_block {
 
 	const struct dentry_operations *s_d_op; /* default d_op for dentries */
 
+	/*
+	 * Saved pool identifier for cleancache (-1 means none)
+	 */
+	int cleancache_poolid;
+
 	struct shrinker *s_shrink;	/* per-sb shrinker handle */
 
 	/* Number of inodes with nlink == 0 but still referenced */
diff --git a/mm/Kconfig b/mm/Kconfig
index 520e12b99148..6e187ffd607d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -929,6 +929,28 @@ config USE_PERCPU_NUMA_NODE_ID
 config HAVE_SETUP_PER_CPU_AREA
 	bool
 
+config CLEANCACHE
+	bool "Enable cleancache driver to cache clean pages if tmem is present"
+	help
+	  Cleancache can be thought of as a page-granularity victim cache
+	  for clean pages that the kernel's pageframe replacement algorithm
+	  (PFRA) would like to keep around, but can't since there isn't enough
+	  memory.  So when the PFRA "evicts" a page, it first attempts to use
+	  cleancache code to put the data contained in that page into
+	  "transcendent memory", memory that is not directly accessible or
+	  addressable by the kernel and is of unknown and possibly
+	  time-varying size.  And when a cleancache-enabled
+	  filesystem wishes to access a page in a file on disk, it first
+	  checks cleancache to see if it already contains it; if it does,
+	  the page is copied into the kernel and a disk access is avoided.
+	  When a transcendent memory driver is available (such as zcache or
+	  Xen transcendent memory), a significant I/O reduction
+	  may be achieved.  When none is available, all cleancache calls
+	  are reduced to a single pointer-compare-against-NULL resulting
+	  in a negligible performance hit.
+
+	  If unsure, say Y to enable cleancache
+
 config CMA
 	bool "Contiguous Memory Allocator"
 	depends on MMU
diff --git a/mm/Makefile b/mm/Makefile
index d5639b036166..90818d75432d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -110,6 +110,7 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o
 obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o
 obj-$(CONFIG_PAGE_OWNER) += page_owner.o
+obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
 obj-$(CONFIG_ZPOOL)	+= zpool.o
 obj-$(CONFIG_ZBUD)	+= zbud.o
diff --git a/mm/cleancache.c b/mm/cleancache.c
new file mode 100644
index 000000000000..db7eee9c0886
--- /dev/null
+++ b/mm/cleancache.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Cleancache frontend
+ *
+ * This code provides the generic "frontend" layer to call a matching
+ * "backend" driver implementation of cleancache.  See
+ * Documentation/vm/cleancache.rst for more information.
+ *
+ * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
+ * Author: Dan Magenheimer
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/exportfs.h>
+#include <linux/mm.h>
+#include <linux/debugfs.h>
+#include <linux/cleancache.h>
+
+/*
+ * cleancache_ops is set by cleancache_register_ops to contain the pointers
+ * to the cleancache "backend" implementation functions.
+ */
+static const struct cleancache_ops *cleancache_ops __read_mostly;
+
+/*
+ * Counters available via /sys/kernel/debug/cleancache (if debugfs is
+ * properly configured.  These are for information only so are not protected
+ * against increment races.
+ */
+static u64 cleancache_succ_gets;
+static u64 cleancache_failed_gets;
+static u64 cleancache_puts;
+static u64 cleancache_invalidates;
+
+static void cleancache_register_ops_sb(struct super_block *sb, void *unused)
+{
+	switch (sb->cleancache_poolid) {
+	case CLEANCACHE_NO_BACKEND:
+		__cleancache_init_fs(sb);
+		break;
+	case CLEANCACHE_NO_BACKEND_SHARED:
+		__cleancache_init_shared_fs(sb);
+		break;
+	}
+}
+
+/*
+ * Register operations for cleancache. Returns 0 on success.
+ */
+int cleancache_register_ops(const struct cleancache_ops *ops)
+{
+	if (cmpxchg(&cleancache_ops, NULL, ops))
+		return -EBUSY;
+
+	/*
+	 * A cleancache backend can be built as a module and hence loaded after
+	 * a cleancache enabled filesystem has called cleancache_init_fs. To
+	 * handle such a scenario, here we call ->init_fs or ->init_shared_fs
+	 * for each active super block. To differentiate between local and
+	 * shared filesystems, we temporarily initialize sb->cleancache_poolid
+	 * to CLEANCACHE_NO_BACKEND or CLEANCACHE_NO_BACKEND_SHARED
+	 * respectively in case there is no backend registered at the time
+	 * cleancache_init_fs or cleancache_init_shared_fs is called.
+	 *
+	 * Since filesystems can be mounted concurrently with cleancache
+	 * backend registration, we have to be careful to guarantee that all
+	 * cleancache enabled filesystems that has been mounted by the time
+	 * cleancache_register_ops is called has got and all mounted later will
+	 * get cleancache_poolid. This is assured by the following statements
+	 * tied together:
+	 *
+	 * a) iterate_supers skips only those super blocks that has started
+	 *    ->kill_sb
+	 *
+	 * b) if iterate_supers encounters a super block that has not finished
+	 *    ->mount yet, it waits until it is finished
+	 *
+	 * c) cleancache_init_fs is called from ->mount and
+	 *    cleancache_invalidate_fs is called from ->kill_sb
+	 *
+	 * d) we call iterate_supers after cleancache_ops has been set
+	 *
+	 * From a) it follows that if iterate_supers skips a super block, then
+	 * either the super block is already dead, in which case we do not need
+	 * to bother initializing cleancache for it, or it was mounted after we
+	 * initiated iterate_supers. In the latter case, it must have seen
+	 * cleancache_ops set according to d) and initialized cleancache from
+	 * ->mount by itself according to c). This proves that we call
+	 * ->init_fs at least once for each active super block.
+	 *
+	 * From b) and c) it follows that if iterate_supers encounters a super
+	 * block that has already started ->init_fs, it will wait until ->mount
+	 * and hence ->init_fs has finished, then check cleancache_poolid, see
+	 * that it has already been set and therefore do nothing. This proves
+	 * that we call ->init_fs no more than once for each super block.
+	 *
+	 * Combined together, the last two paragraphs prove the function
+	 * correctness.
+	 *
+	 * Note that various cleancache callbacks may proceed before this
+	 * function is called or even concurrently with it, but since
+	 * CLEANCACHE_NO_BACKEND is negative, they will all result in a noop
+	 * until the corresponding ->init_fs has been actually called and
+	 * cleancache_ops has been set.
+	 */
+	iterate_supers(cleancache_register_ops_sb, NULL);
+	return 0;
+}
+EXPORT_SYMBOL(cleancache_register_ops);
+
+/* Called by a cleancache-enabled filesystem at time of mount */
+void __cleancache_init_fs(struct super_block *sb)
+{
+	int pool_id = CLEANCACHE_NO_BACKEND;
+
+	if (cleancache_ops) {
+		pool_id = cleancache_ops->init_fs(PAGE_SIZE);
+		if (pool_id < 0)
+			pool_id = CLEANCACHE_NO_POOL;
+	}
+	sb->cleancache_poolid = pool_id;
+}
+EXPORT_SYMBOL(__cleancache_init_fs);
+
+/* Called by a cleancache-enabled clustered filesystem at time of mount */
+void __cleancache_init_shared_fs(struct super_block *sb)
+{
+	int pool_id = CLEANCACHE_NO_BACKEND_SHARED;
+
+	if (cleancache_ops) {
+		pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE);
+		if (pool_id < 0)
+			pool_id = CLEANCACHE_NO_POOL;
+	}
+	sb->cleancache_poolid = pool_id;
+}
+EXPORT_SYMBOL(__cleancache_init_shared_fs);
+
+/*
+ * If the filesystem uses exportable filehandles, use the filehandle as
+ * the key, else use the inode number.
+ */
+static int cleancache_get_key(struct inode *inode,
+			      struct cleancache_filekey *key)
+{
+	int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *);
+	int len = 0, maxlen = CLEANCACHE_KEY_MAX;
+	struct super_block *sb = inode->i_sb;
+
+	key->u.ino = inode->i_ino;
+	if (sb->s_export_op != NULL) {
+		fhfn = sb->s_export_op->encode_fh;
+		if  (fhfn) {
+			len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL);
+			if (len <= FILEID_ROOT || len == FILEID_INVALID)
+				return -1;
+			if (maxlen > CLEANCACHE_KEY_MAX)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * "Get" data from cleancache associated with the poolid/inode/index
+ * that were specified when the data was put to cleanache and, if
+ * successful, use it to fill the specified page with data and return 0.
+ * The pageframe is unchanged and returns -1 if the get fails.
+ * Page must be locked by caller.
+ *
+ * The function has two checks before any action is taken - whether
+ * a backend is registered and whether the sb->cleancache_poolid
+ * is correct.
+ */
+int __cleancache_get_page(struct page *page)
+{
+	int ret = -1;
+	int pool_id;
+	struct cleancache_filekey key = { .u.key = { 0 } };
+
+	if (!cleancache_ops) {
+		cleancache_failed_gets++;
+		goto out;
+	}
+
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	pool_id = page->mapping->host->i_sb->cleancache_poolid;
+	if (pool_id < 0)
+		goto out;
+
+	if (cleancache_get_key(page->mapping->host, &key) < 0)
+		goto out;
+
+	ret = cleancache_ops->get_page(pool_id, key, page->index, page);
+	if (ret == 0)
+		cleancache_succ_gets++;
+	else
+		cleancache_failed_gets++;
+out:
+	return ret;
+}
+EXPORT_SYMBOL(__cleancache_get_page);
+
+/*
+ * "Put" data from a page to cleancache and associate it with the
+ * (previously-obtained per-filesystem) poolid and the page's,
+ * inode and page index.  Page must be locked.  Note that a put_page
+ * always "succeeds", though a subsequent get_page may succeed or fail.
+ *
+ * The function has two checks before any action is taken - whether
+ * a backend is registered and whether the sb->cleancache_poolid
+ * is correct.
+ */
+void __cleancache_put_page(struct page *page)
+{
+	int pool_id;
+	struct cleancache_filekey key = { .u.key = { 0 } };
+
+	if (!cleancache_ops) {
+		cleancache_puts++;
+		return;
+	}
+
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	pool_id = page->mapping->host->i_sb->cleancache_poolid;
+	if (pool_id >= 0 &&
+		cleancache_get_key(page->mapping->host, &key) >= 0) {
+		cleancache_ops->put_page(pool_id, key, page->index, page);
+		cleancache_puts++;
+	}
+}
+EXPORT_SYMBOL(__cleancache_put_page);
+
+/*
+ * Invalidate any data from cleancache associated with the poolid and the
+ * page's inode and page index so that a subsequent "get" will fail.
+ *
+ * The function has two checks before any action is taken - whether
+ * a backend is registered and whether the sb->cleancache_poolid
+ * is correct.
+ */
+void __cleancache_invalidate_page(struct address_space *mapping,
+					struct page *page)
+{
+	/* careful... page->mapping is NULL sometimes when this is called */
+	int pool_id = mapping->host->i_sb->cleancache_poolid;
+	struct cleancache_filekey key = { .u.key = { 0 } };
+
+	if (!cleancache_ops)
+		return;
+
+	if (pool_id >= 0) {
+		VM_BUG_ON_PAGE(!PageLocked(page), page);
+		if (cleancache_get_key(mapping->host, &key) >= 0) {
+			cleancache_ops->invalidate_page(pool_id,
+					key, page->index);
+			cleancache_invalidates++;
+		}
+	}
+}
+EXPORT_SYMBOL(__cleancache_invalidate_page);
+
+/*
+ * Invalidate all data from cleancache associated with the poolid and the
+ * mappings's inode so that all subsequent gets to this poolid/inode
+ * will fail.
+ *
+ * The function has two checks before any action is taken - whether
+ * a backend is registered and whether the sb->cleancache_poolid
+ * is correct.
+ */
+void __cleancache_invalidate_inode(struct address_space *mapping)
+{
+	int pool_id = mapping->host->i_sb->cleancache_poolid;
+	struct cleancache_filekey key = { .u.key = { 0 } };
+
+	if (!cleancache_ops)
+		return;
+
+	if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
+		cleancache_ops->invalidate_inode(pool_id, key);
+}
+EXPORT_SYMBOL(__cleancache_invalidate_inode);
+
+/*
+ * Called by any cleancache-enabled filesystem at time of unmount;
+ * note that pool_id is surrendered and may be returned by a subsequent
+ * cleancache_init_fs or cleancache_init_shared_fs.
+ */
+void __cleancache_invalidate_fs(struct super_block *sb)
+{
+	int pool_id;
+
+	pool_id = sb->cleancache_poolid;
+	sb->cleancache_poolid = CLEANCACHE_NO_POOL;
+
+	if (cleancache_ops && pool_id >= 0)
+		cleancache_ops->invalidate_fs(pool_id);
+}
+EXPORT_SYMBOL(__cleancache_invalidate_fs);
+
+static int __init init_cleancache(void)
+{
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *root = debugfs_create_dir("cleancache", NULL);
+
+	debugfs_create_u64("succ_gets", 0444, root, &cleancache_succ_gets);
+	debugfs_create_u64("failed_gets", 0444, root, &cleancache_failed_gets);
+	debugfs_create_u64("puts", 0444, root, &cleancache_puts);
+	debugfs_create_u64("invalidates", 0444, root, &cleancache_invalidates);
+#endif
+	return 0;
+}
+module_init(init_cleancache)
diff --git a/mm/filemap.c b/mm/filemap.c
index 56fa431c52af..d7e89f65ea9c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -36,6 +36,7 @@
 #include <linux/cpuset.h>
 #include <linux/hugetlb.h>
 #include <linux/memcontrol.h>
+#include <linux/cleancache.h>
 #include <linux/shmem_fs.h>
 #include <linux/rmap.h>
 #include <linux/delayacct.h>
@@ -160,6 +161,16 @@ static void filemap_unaccount_folio(struct address_space *mapping,
 {
 	long nr;
 
+	/*
+	 * if we're uptodate, flush out into the cleancache, otherwise
+	 * invalidate any existing cleancache entries.  We can't leave
+	 * stale data around in the cleancache once our page is gone
+	 */
+	if (folio_test_uptodate(folio) && folio_test_mappedtodisk(folio))
+		cleancache_put_page(&folio->page);
+	else
+		cleancache_invalidate_page(mapping, &folio->page);
+
 	VM_BUG_ON_FOLIO(folio_mapped(folio), folio);
 	if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) {
 		pr_alert("BUG: Bad page cache in process %s  pfn:%05lx\n",
diff --git a/mm/truncate.c b/mm/truncate.c
index 0668cd340a46..12f9e3219acd 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -20,6 +20,7 @@
 #include <linux/pagevec.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/shmem_fs.h>
+#include <linux/cleancache.h>
 #include <linux/rmap.h>
 #include "internal.h"
 
@@ -219,6 +220,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
 	if (!mapping_inaccessible(folio->mapping))
 		folio_zero_range(folio, offset, length);
 
+	cleancache_invalidate_page(folio->mapping, &folio->page);
 	if (folio_needs_release(folio))
 		folio_invalidate(folio, offset, length);
 	if (!folio_test_large(folio))
@@ -314,7 +316,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	bool		same_folio;
 
 	if (mapping_empty(mapping))
-		return;
+		goto out;
 
 	/*
 	 * 'start' and 'end' always covers the range of pages to be fully
@@ -402,6 +404,9 @@ void truncate_inode_pages_range(struct address_space *mapping,
 		truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
 		folio_batch_release(&fbatch);
 	}
+
+out:
+	cleancache_invalidate_inode(mapping);
 }
 EXPORT_SYMBOL(truncate_inode_pages_range);
 
@@ -455,6 +460,10 @@ void truncate_inode_pages_final(struct address_space *mapping)
 		xa_unlock_irq(&mapping->i_pages);
 	}
 
+	/*
+	 * Cleancache needs notification even if there are no pages or shadow
+	 * entries.
+	 */
 	truncate_inode_pages(mapping, 0);
 }
 EXPORT_SYMBOL(truncate_inode_pages_final);
@@ -608,7 +617,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 	bool xa_has_values = false;
 
 	if (mapping_empty(mapping))
-		return 0;
+		goto out;
 
 	folio_batch_init(&fbatch);
 	index = start;
@@ -675,6 +684,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 	if (dax_mapping(mapping)) {
 		unmap_mapping_pages(mapping, start, end - start + 1, false);
 	}
+out:
+	cleancache_invalidate_inode(mapping);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);