Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * mmap.c
4 *
5 * Code to deal with the mess that is clustered mmap.
6 *
7 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
8 */
9
10#include <linux/fs.h>
11#include <linux/types.h>
12#include <linux/highmem.h>
13#include <linux/pagemap.h>
14#include <linux/uio.h>
15#include <linux/signal.h>
16#include <linux/rbtree.h>
17
18#include <cluster/masklog.h>
19
20#include "ocfs2.h"
21
22#include "aops.h"
23#include "dlmglue.h"
24#include "file.h"
25#include "inode.h"
26#include "mmap.h"
27#include "super.h"
28#include "ocfs2_trace.h"
29
30
31static vm_fault_t ocfs2_fault(struct vm_fault *vmf)
32{
33 unsigned long long ip_blkno =
34 OCFS2_I(file_inode(vmf->vma->vm_file))->ip_blkno;
35 sigset_t oldset;
36 vm_fault_t ret;
37
38 ocfs2_block_signals(&oldset);
39 ret = filemap_fault(vmf);
40 ocfs2_unblock_signals(&oldset);
41
42 trace_ocfs2_fault(ip_blkno, vmf->page, vmf->pgoff);
43 return ret;
44}
45static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
46 struct buffer_head *di_bh, struct folio *folio)
47{
48 int err;
49 vm_fault_t ret = VM_FAULT_NOPAGE;
50 struct inode *inode = file_inode(file);
51 struct address_space *mapping = inode->i_mapping;
52 loff_t pos = folio_pos(folio);
53 unsigned int len = PAGE_SIZE;
54 pgoff_t last_index;
55 struct folio *locked_folio = NULL;
56 void *fsdata;
57 loff_t size = i_size_read(inode);
58
59 last_index = (size - 1) >> PAGE_SHIFT;
60
61 /*
62 * There are cases that lead to the page no longer belonging to the
63 * mapping.
64 * 1) pagecache truncates locally due to memory pressure.
65 * 2) pagecache truncates when another is taking EX lock against
66 * inode lock. see ocfs2_data_convert_worker.
67 *
68 * The i_size check doesn't catch the case where nodes truncated and
69 * then re-extended the file. We'll re-check the page mapping after
70 * taking the page lock inside of ocfs2_write_begin_nolock().
71 *
72 * Let VM retry with these cases.
73 */
74 if ((folio->mapping != inode->i_mapping) ||
75 !folio_test_uptodate(folio) ||
76 (pos >= size))
77 goto out;
78
79 /*
80 * Call ocfs2_write_begin() and ocfs2_write_end() to take
81 * advantage of the allocation code there. We pass a write
82 * length of the whole page (chopped to i_size) to make sure
83 * the whole thing is allocated.
84 *
85 * Since we know the page is up to date, we don't have to
86 * worry about ocfs2_write_begin() skipping some buffer reads
87 * because the "write" would invalidate their data.
88 */
89 if (folio->index == last_index)
90 len = ((size - 1) & ~PAGE_MASK) + 1;
91
92 err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
93 &locked_folio, &fsdata, di_bh, folio);
94 if (err) {
95 if (err != -ENOSPC)
96 mlog_errno(err);
97 ret = vmf_error(err);
98 goto out;
99 }
100
101 if (!locked_folio) {
102 ret = VM_FAULT_NOPAGE;
103 goto out;
104 }
105 err = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata);
106 BUG_ON(err != len);
107 ret = VM_FAULT_LOCKED;
108out:
109 return ret;
110}
111
112static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
113{
114 struct folio *folio = page_folio(vmf->page);
115 struct inode *inode = file_inode(vmf->vma->vm_file);
116 struct buffer_head *di_bh = NULL;
117 sigset_t oldset;
118 int err;
119 vm_fault_t ret;
120
121 sb_start_pagefault(inode->i_sb);
122 ocfs2_block_signals(&oldset);
123
124 /*
125 * The cluster locks taken will block a truncate from another
126 * node. Taking the data lock will also ensure that we don't
127 * attempt page truncation as part of a downconvert.
128 */
129 err = ocfs2_inode_lock(inode, &di_bh, 1);
130 if (err < 0) {
131 mlog_errno(err);
132 ret = vmf_error(err);
133 goto out;
134 }
135
136 /*
137 * The alloc sem should be enough to serialize with
138 * ocfs2_truncate_file() changing i_size as well as any thread
139 * modifying the inode btree.
140 */
141 down_write(&OCFS2_I(inode)->ip_alloc_sem);
142
143 ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, folio);
144
145 up_write(&OCFS2_I(inode)->ip_alloc_sem);
146
147 brelse(di_bh);
148 ocfs2_inode_unlock(inode, 1);
149
150out:
151 ocfs2_unblock_signals(&oldset);
152 sb_end_pagefault(inode->i_sb);
153 return ret;
154}
155
156static const struct vm_operations_struct ocfs2_file_vm_ops = {
157 .fault = ocfs2_fault,
158 .page_mkwrite = ocfs2_page_mkwrite,
159};
160
161int ocfs2_mmap_prepare(struct vm_area_desc *desc)
162{
163 struct file *file = desc->file;
164 int ret = 0, lock_level = 0;
165
166 ret = ocfs2_inode_lock_atime(file_inode(file),
167 file->f_path.mnt, &lock_level, 1);
168 if (ret < 0) {
169 mlog_errno(ret);
170 goto out;
171 }
172 ocfs2_inode_unlock(file_inode(file), lock_level);
173out:
174 desc->vm_ops = &ocfs2_file_vm_ops;
175 return 0;
176}
177