Files
ack-tegra/fs/fuse/passthrough.c
Daniel Rosenberg 412d297361 ANDROID: Fuse: Fix Passthrough and Cache Coherency
Fuse writeback and passthrough have some interactions that require
additional handling.

If there is cached data from previous reads, and a passthrough write
happens, the relevant sections of the cache should be invalidated.
Otherwise the new data is inaccessible to non-passthrough until the
cache otherwise expires.

If there are dirty pages, passthrough read would not be aware of these
changes. First, they must be flushed so that the lower filesystem will
reflect the change. This can't be done directly since there may be
specific logic about how that data gets written that fuse itself does
not know about.

This also enables mmap for passthrough and nonpassthrough at the same
time. Note that if a shared mmap is open for both passthrough and
nonpassthrough, there will be some inconsistencies between data seen on
both, although interactions between file read and write are handled.
This is an improvement on the existing Android interaction.

Change-Id: I45c3efe92849a88ffcf586a1ec6508fed632e1d0
Bug: 372951405
Test: atest android.scopedstorage.cts.general.ScopedStorageDeviceTest
Signed-off-by: Daniel Rosenberg <drosen@google.com>
2025-02-10 16:50:27 -08:00

380 lines
8.8 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* FUSE passthrough to backing file.
*
* Copyright (c) 2023 CTERA Networks.
*/
#include "fuse_i.h"
#include <linux/file.h>
#include <linux/backing-file.h>
#include <linux/splice.h>
#include <linux/pagemap.h>
static void fuse_file_accessed(struct file *file)
{
struct inode *inode = file_inode(file);
fuse_invalidate_atime(inode);
}
static void fuse_passthrough_end_write(struct file *file, loff_t pos, ssize_t ret)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
struct file *backing_file = fuse_file_passthrough(ff);
struct inode *backing_inode = file_inode(backing_file);
if (!fc->writeback_cache) {
fuse_write_update_attr(inode, pos, ret);
} else {
inode_set_mtime_to_ts(inode, inode_get_mtime(backing_inode));
inode_set_ctime_to_ts(inode, inode_get_ctime(backing_inode));
inode->i_blocks = backing_inode->i_blocks;
i_size_write(inode, i_size_read(backing_inode));
}
if (ret > 0) {
invalidate_inode_pages2_range(inode->i_mapping,
(pos - ret) >> PAGE_SHIFT, pos >> PAGE_SHIFT);
}
}
ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data;
struct file *backing_file = fuse_file_passthrough(ff);
size_t count = iov_iter_count(iter);
ssize_t ret;
struct backing_file_ctx ctx = {
.cred = ff->cred,
.user_file = file,
.accessed = fuse_file_accessed,
};
pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu\n", __func__,
backing_file, iocb->ki_pos, count);
if (!count)
return 0;
/* Flush any dirtied cache pages from fuse cache */
write_inode_now(file_inode(file), 1);
ret = backing_file_read_iter(backing_file, iter, iocb, iocb->ki_flags,
&ctx);
return ret;
}
ssize_t fuse_passthrough_write_iter(struct kiocb *iocb,
struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct fuse_file *ff = file->private_data;
struct file *backing_file = fuse_file_passthrough(ff);
size_t count = iov_iter_count(iter);
ssize_t ret;
struct backing_file_ctx ctx = {
.cred = ff->cred,
.user_file = file,
.end_write = fuse_passthrough_end_write,
};
pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu\n", __func__,
backing_file, iocb->ki_pos, count);
if (!count)
return 0;
inode_lock(inode);
ret = backing_file_write_iter(backing_file, iter, iocb, iocb->ki_flags,
&ctx);
inode_unlock(inode);
return ret;
}
ssize_t fuse_passthrough_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
struct fuse_file *ff = in->private_data;
struct file *backing_file = fuse_file_passthrough(ff);
struct backing_file_ctx ctx = {
.cred = ff->cred,
.user_file = in,
.accessed = fuse_file_accessed,
};
pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu, flags=0x%x\n", __func__,
backing_file, ppos ? *ppos : 0, len, flags);
/* Flush any dirtied cache pages from fuse cache */
write_inode_now(file_inode(in), 1);
return backing_file_splice_read(backing_file, ppos, pipe, len, flags,
&ctx);
}
ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
{
struct fuse_file *ff = out->private_data;
struct file *backing_file = fuse_file_passthrough(ff);
struct inode *inode = file_inode(out);
ssize_t ret;
struct backing_file_ctx ctx = {
.cred = ff->cred,
.user_file = out,
.end_write = fuse_passthrough_end_write,
};
pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu, flags=0x%x\n", __func__,
backing_file, ppos ? *ppos : 0, len, flags);
inode_lock(inode);
ret = backing_file_splice_write(pipe, backing_file, ppos, len, flags,
&ctx);
inode_unlock(inode);
return ret;
}
ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma)
{
struct fuse_file *ff = file->private_data;
struct file *backing_file = fuse_file_passthrough(ff);
struct backing_file_ctx ctx = {
.cred = ff->cred,
.user_file = file,
.accessed = fuse_file_accessed,
};
pr_debug("%s: backing_file=0x%p, start=%lu, end=%lu\n", __func__,
backing_file, vma->vm_start, vma->vm_end);
return backing_file_mmap(backing_file, vma, &ctx);
}
struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
{
if (fb && refcount_inc_not_zero(&fb->count))
return fb;
return NULL;
}
static void fuse_backing_free(struct fuse_backing *fb)
{
pr_debug("%s: fb=0x%p\n", __func__, fb);
if (fb->file)
fput(fb->file);
put_cred(fb->cred);
kfree_rcu(fb, rcu);
}
void fuse_backing_put(struct fuse_backing *fb)
{
if (fb && refcount_dec_and_test(&fb->count))
fuse_backing_free(fb);
}
void fuse_backing_files_init(struct fuse_conn *fc)
{
idr_init(&fc->backing_files_map);
}
static int fuse_backing_id_alloc(struct fuse_conn *fc, struct fuse_backing *fb)
{
int id;
idr_preload(GFP_KERNEL);
spin_lock(&fc->lock);
/* FIXME: xarray might be space inefficient */
id = idr_alloc_cyclic(&fc->backing_files_map, fb, 1, 0, GFP_ATOMIC);
spin_unlock(&fc->lock);
idr_preload_end();
WARN_ON_ONCE(id == 0);
return id;
}
static struct fuse_backing *fuse_backing_id_remove(struct fuse_conn *fc,
int id)
{
struct fuse_backing *fb;
spin_lock(&fc->lock);
fb = idr_remove(&fc->backing_files_map, id);
spin_unlock(&fc->lock);
return fb;
}
static int fuse_backing_id_free(int id, void *p, void *data)
{
struct fuse_backing *fb = p;
WARN_ON_ONCE(refcount_read(&fb->count) != 1);
fuse_backing_free(fb);
return 0;
}
void fuse_backing_files_free(struct fuse_conn *fc)
{
idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
idr_destroy(&fc->backing_files_map);
}
int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
{
struct file *file;
struct super_block *backing_sb;
struct fuse_backing *fb = NULL;
int res;
pr_debug("%s: fd=%d flags=0x%x\n", __func__, map->fd, map->flags);
/* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
/* Android already restricts access here, and we don't want to grant extra
* Permissions to the daemon */
#if 0
res = -EPERM;
if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
goto out;
#endif
res = -EINVAL;
if (map->flags || map->padding)
goto out;
file = fget_raw(map->fd);
res = -EBADF;
if (!file)
goto out;
backing_sb = file_inode(file)->i_sb;
res = -ELOOP;
if (backing_sb->s_stack_depth >= fc->max_stack_depth)
goto out_fput;
fb = kmalloc(sizeof(struct fuse_backing), GFP_KERNEL);
res = -ENOMEM;
if (!fb)
goto out_fput;
fb->file = file;
fb->cred = prepare_creds();
refcount_set(&fb->count, 1);
res = fuse_backing_id_alloc(fc, fb);
if (res < 0) {
fuse_backing_free(fb);
fb = NULL;
}
out:
pr_debug("%s: fb=0x%p, ret=%i\n", __func__, fb, res);
return res;
out_fput:
fput(file);
goto out;
}
int fuse_backing_close(struct fuse_conn *fc, int backing_id)
{
struct fuse_backing *fb = NULL;
int err;
pr_debug("%s: backing_id=%d\n", __func__, backing_id);
/* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
/* Android already restricts access here, and we don't want to grant extra
* Permissions to the daemon */
#if 0
err = -EPERM;
if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
goto out;
#endif
err = -EINVAL;
if (backing_id <= 0)
goto out;
err = -ENOENT;
fb = fuse_backing_id_remove(fc, backing_id);
if (!fb)
goto out;
fuse_backing_put(fb);
err = 0;
out:
pr_debug("%s: fb=0x%p, err=%i\n", __func__, fb, err);
return err;
}
/*
* Setup passthrough to a backing file.
*
* Returns an fb object with elevated refcount to be stored in fuse inode.
*/
struct fuse_backing *fuse_passthrough_open(struct file *file,
struct inode *inode,
int backing_id)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fm->fc;
struct fuse_backing *fb = NULL;
struct file *backing_file;
int err;
err = -EINVAL;
if (backing_id <= 0)
goto out;
rcu_read_lock();
fb = idr_find(&fc->backing_files_map, backing_id);
fb = fuse_backing_get(fb);
rcu_read_unlock();
err = -ENOENT;
if (!fb)
goto out;
/* Allocate backing file per fuse file to store fuse path */
backing_file = backing_file_open(&file->f_path, file->f_flags,
&fb->file->f_path, fb->cred);
err = PTR_ERR(backing_file);
if (IS_ERR(backing_file)) {
fuse_backing_put(fb);
goto out;
}
err = 0;
ff->passthrough = backing_file;
ff->cred = get_cred(fb->cred);
out:
pr_debug("%s: backing_id=%d, fb=0x%p, backing_file=0x%p, err=%i\n", __func__,
backing_id, fb, ff->passthrough, err);
return err ? ERR_PTR(err) : fb;
}
void fuse_passthrough_release(struct fuse_file *ff, struct fuse_backing *fb)
{
pr_debug("%s: fb=0x%p, backing_file=0x%p\n", __func__,
fb, ff->passthrough);
fput(ff->passthrough);
ff->passthrough = NULL;
put_cred(ff->cred);
ff->cred = NULL;
}