diff --git a/include/linux/mman.h b/include/linux/mman.h index a842783ffa62..9a66491b6e97 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -152,7 +152,7 @@ calc_vm_prot_bits(unsigned long prot, unsigned long pkey) * Combine the mmap "flags" argument into "vm_flags" used internally. */ static inline unsigned long -calc_vm_flag_bits(struct file *file, unsigned long flags) +__calc_vm_flag_bits(struct file *file, unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | diff --git a/include/linux/page_size_compat.h b/include/linux/page_size_compat.h index 019db666ffcd..944a708fc0ec 100644 --- a/include/linux/page_size_compat.h +++ b/include/linux/page_size_compat.h @@ -87,14 +87,60 @@ static __always_inline unsigned __page_shift(void) #define __PAGE_SIZE_ROUND_UP_ADJ(size) \ ((size) + (((1 << (__PAGE_SHIFT - PAGE_SHIFT)) - 1) << PAGE_SHIFT)) -/* VMA is exempt from emulated page align requirements */ +/* + * VMA is exempt from emulated page align requirements + * + * NOTE: __MAP_NO_COMPAT is not new UABI it is only ever set by the kernel + * in ___filemap_fixup() + */ #define __VM_NO_COMPAT (_AC(1,ULL) << 63) #define __MAP_NO_COMPAT (_AC(1,ULL) << 63) -/* Combine the mmap "flags" argument into "vm_flags" add translation of the no-compat flag. */ -static inline unsigned long __calc_vm_flag_bits(unsigned long flags) +/* + * Conditional page-alignment based on mmap flags + * + * If the VMA is allowed to not respect the emulated page size, align using the + * base PAGE_SIZE, else align using the emulated __PAGE_SIZE. + */ +#define __COMPAT_PAGE_ALIGN(size, flags) \ + (flags & __MAP_NO_COMPAT) ? PAGE_ALIGN(size) : __PAGE_ALIGN(size) + +/* + * Combines the mmap "flags" argument into "vm_flags" + * + * If page size emulation is enabled, adds translation of the no-compat flag. + */ +static __always_inline unsigned long calc_vm_flag_bits(struct file *file, unsigned long flags) { - return calc_vm_flag_bits(flags) | _calc_vm_trans(flags, __MAP_NO_COMPAT, __VM_NO_COMPAT ); + unsigned long flag_bits = __calc_vm_flag_bits(file, flags); + + if (static_branch_unlikely(&page_shift_compat_enabled)) + flag_bits |= _calc_vm_trans(flags, __MAP_NO_COMPAT, __VM_NO_COMPAT ); + + return flag_bits; +} + +extern unsigned long ___filemap_len(struct inode *inode, unsigned long pgoff, + unsigned long len, unsigned long flags); + +extern void ___filemap_fixup(unsigned long addr, unsigned long prot, unsigned long old_len, + unsigned long new_len); + +static __always_inline unsigned long __filemap_len(struct inode *inode, unsigned long pgoff, + unsigned long len, unsigned long flags) +{ + if (static_branch_unlikely(&page_shift_compat_enabled)) + return ___filemap_len(inode, pgoff, len, flags); + else + return len; +} + +static __always_inline void __filemap_fixup(unsigned long addr, unsigned long prot, + unsigned long old_len, unsigned long new_len) +{ + + if (static_branch_unlikely(&page_shift_compat_enabled)) + ___filemap_fixup(addr, prot, old_len, new_len); } #endif /* __LINUX_PAGE_SIZE_COMPAT_H */ diff --git a/mm/mmap.c b/mm/mmap.c index bc6948ca5f8b..95985998e1c0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -290,6 +290,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long pgoff, unsigned long *populate, struct list_head *uf) { + unsigned long old_len; struct mm_struct *mm = current->mm; int pkey = 0; @@ -316,10 +317,13 @@ unsigned long do_mmap(struct file *file, unsigned long addr, addr = round_hint_to_min(addr); /* Careful about overflows.. */ - len = PAGE_ALIGN(len); + len = __COMPAT_PAGE_ALIGN(len, flags); if (!len) return -ENOMEM; + /* Save the requested len */ + old_len = len; + /* offset overflow? */ if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) return -EOVERFLOW; @@ -378,6 +382,8 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (!file_mmap_ok(file, inode, pgoff, len)) return -EOVERFLOW; + len = __filemap_len(inode, pgoff, len, flags); + flags_mask = LEGACY_MAP_MASK; if (file->f_op->fop_flags & FOP_MMAP_SYNC) flags_mask |= MAP_SYNC; @@ -505,6 +511,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr, ((vm_flags & VM_LOCKED) || (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) *populate = len; + + __filemap_fixup(addr, prot, old_len, len); + return addr; } diff --git a/mm/nommu.c b/mm/nommu.c index 362116ba6b05..61936fa0f08b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/page_size_compat.c b/mm/page_size_compat.c index 88708c3f4779..89bbd9ec5e0c 100644 --- a/mm/page_size_compat.c +++ b/mm/page_size_compat.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #define MIN_PAGE_SHIFT_COMPAT (PAGE_SHIFT + 1) @@ -56,3 +57,93 @@ static int __init init_mmap_rnd_bits(void) return 0; } core_initcall(init_mmap_rnd_bits); + +/* + * Updates len to avoid mapping off the end of the file. + * + * The length of the original mapping must be updated before + * it's VMA is created to avoid an unaligned munmap in the + * MAP_FIXED fixup mapping. + */ +unsigned long ___filemap_len(struct inode *inode, unsigned long pgoff, unsigned long len, + unsigned long flags) +{ + unsigned long file_size; + unsigned long new_len; + pgoff_t max_pgcount; + pgoff_t last_pgoff; + + if (flags & __MAP_NO_COMPAT) + return len; + + file_size = (unsigned long) i_size_read(inode); + + /* + * Round up, so that this is a count (not an index). This simplifies + * the following calculations. + */ + max_pgcount = DIV_ROUND_UP(file_size, PAGE_SIZE); + last_pgoff = pgoff + (len >> PAGE_SHIFT); + + if (unlikely(last_pgoff >= max_pgcount)) { + new_len = (max_pgcount - pgoff) << PAGE_SHIFT; + /* Careful of underflows in special files */ + if (new_len > 0 && new_len < len) + return new_len; + } + + return len; +} + +/* + * This is called to fill any holes created by ___filemap_len() + * with an anonymous mapping. + */ +void ___filemap_fixup(unsigned long addr, unsigned long prot, unsigned long old_len, + unsigned long new_len) +{ + unsigned long anon_len = old_len - new_len; + unsigned long anon_addr = addr + new_len; + struct mm_struct *mm = current->mm; + unsigned long populate = 0; + struct vm_area_struct *vma; + + if (!anon_len) + return; + + BUG_ON(new_len > old_len); + + /* The original do_mmap() failed */ + if (IS_ERR_VALUE(addr)) + return; + + vma = find_vma(mm, addr); + + /* + * This should never happen, VMA was inserted and we still + * haven't released the mmap write lock. + */ + BUG_ON(!vma); + + /* Only handle fixups for filemap faults */ + if (vma->vm_ops && vma->vm_ops->fault != filemap_fault) + return; + + /* + * Override the end of the file mapping that is off the file + * with an anonymous mapping. + */ + anon_addr = do_mmap(NULL, anon_addr, anon_len, prot, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|__MAP_NO_COMPAT, + 0, 0, &populate, NULL); + + if (!IS_ERR_VALUE(anon_addr)) { + struct anon_vma_name *anon_name = anon_vma_name_alloc("filemap_fixup"); + + if (!anon_name) + return; + + /* Label the fixup VMA */ + madvise_set_anon_name(mm, anon_addr, anon_len, anon_name); + } +}