1  /* SPDX-License-Identifier: GPL-2.0 */
2  #ifndef _LINUX_FS_H
3  #define _LINUX_FS_H
4  
5  #include <linux/linkage.h>
6  #include <linux/wait_bit.h>
7  #include <linux/kdev_t.h>
8  #include <linux/dcache.h>
9  #include <linux/path.h>
10  #include <linux/stat.h>
11  #include <linux/cache.h>
12  #include <linux/list.h>
13  #include <linux/list_lru.h>
14  #include <linux/llist.h>
15  #include <linux/radix-tree.h>
16  #include <linux/xarray.h>
17  #include <linux/rbtree.h>
18  #include <linux/init.h>
19  #include <linux/pid.h>
20  #include <linux/bug.h>
21  #include <linux/mutex.h>
22  #include <linux/rwsem.h>
23  #include <linux/mm_types.h>
24  #include <linux/capability.h>
25  #include <linux/semaphore.h>
26  #include <linux/fcntl.h>
27  #include <linux/rculist_bl.h>
28  #include <linux/atomic.h>
29  #include <linux/shrinker.h>
30  #include <linux/migrate_mode.h>
31  #include <linux/uidgid.h>
32  #include <linux/lockdep.h>
33  #include <linux/percpu-rwsem.h>
34  #include <linux/workqueue.h>
35  #include <linux/delayed_call.h>
36  #include <linux/uuid.h>
37  #include <linux/errseq.h>
38  #include <linux/ioprio.h>
39  #include <linux/fs_types.h>
40  #include <linux/build_bug.h>
41  #include <linux/stddef.h>
42  #include <linux/mount.h>
43  #include <linux/cred.h>
44  #include <linux/mnt_idmapping.h>
45  #include <linux/slab.h>
46  
47  #include <asm/byteorder.h>
48  #include <uapi/linux/fs.h>
49  
50  struct backing_dev_info;
51  struct bdi_writeback;
52  struct bio;
53  struct io_comp_batch;
54  struct export_operations;
55  struct fiemap_extent_info;
56  struct hd_geometry;
57  struct iovec;
58  struct kiocb;
59  struct kobject;
60  struct pipe_inode_info;
61  struct poll_table_struct;
62  struct kstatfs;
63  struct vm_area_struct;
64  struct vfsmount;
65  struct cred;
66  struct swap_info_struct;
67  struct seq_file;
68  struct workqueue_struct;
69  struct iov_iter;
70  struct fscrypt_info;
71  struct fscrypt_operations;
72  struct fsverity_info;
73  struct fsverity_operations;
74  struct fs_context;
75  struct fs_parameter_spec;
76  struct fileattr;
77  struct iomap_ops;
78  
79  extern void __init inode_init(void);
80  extern void __init inode_init_early(void);
81  extern void __init files_init(void);
82  extern void __init files_maxfiles_init(void);
83  
84  extern unsigned long get_max_files(void);
85  extern unsigned int sysctl_nr_open;
86  
87  typedef __kernel_rwf_t rwf_t;
88  
89  struct buffer_head;
90  typedef int (get_block_t)(struct inode *inode, sector_t iblock,
91  			struct buffer_head *bh_result, int create);
92  typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
93  			ssize_t bytes, void *private);
94  
95  #define MAY_EXEC		0x00000001
96  #define MAY_WRITE		0x00000002
97  #define MAY_READ		0x00000004
98  #define MAY_APPEND		0x00000008
99  #define MAY_ACCESS		0x00000010
100  #define MAY_OPEN		0x00000020
101  #define MAY_CHDIR		0x00000040
102  /* called from RCU mode, don't block */
103  #define MAY_NOT_BLOCK		0x00000080
104  
105  /*
106   * flags in file.f_mode.  Note that FMODE_READ and FMODE_WRITE must correspond
107   * to O_WRONLY and O_RDWR via the strange trick in do_dentry_open()
108   */
109  
110  /* file is open for reading */
111  #define FMODE_READ		((__force fmode_t)0x1)
112  /* file is open for writing */
113  #define FMODE_WRITE		((__force fmode_t)0x2)
114  /* file is seekable */
115  #define FMODE_LSEEK		((__force fmode_t)0x4)
116  /* file can be accessed using pread */
117  #define FMODE_PREAD		((__force fmode_t)0x8)
118  /* file can be accessed using pwrite */
119  #define FMODE_PWRITE		((__force fmode_t)0x10)
120  /* File is opened for execution with sys_execve / sys_uselib */
121  #define FMODE_EXEC		((__force fmode_t)0x20)
122  /* 32bit hashes as llseek() offset (for directories) */
123  #define FMODE_32BITHASH         ((__force fmode_t)0x200)
124  /* 64bit hashes as llseek() offset (for directories) */
125  #define FMODE_64BITHASH         ((__force fmode_t)0x400)
126  
127  /*
128   * Don't update ctime and mtime.
129   *
130   * Currently a special hack for the XFS open_by_handle ioctl, but we'll
131   * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
132   */
133  #define FMODE_NOCMTIME		((__force fmode_t)0x800)
134  
135  /* Expect random access pattern */
136  #define FMODE_RANDOM		((__force fmode_t)0x1000)
137  
138  /* File is huge (eg. /dev/mem): treat loff_t as unsigned */
139  #define FMODE_UNSIGNED_OFFSET	((__force fmode_t)0x2000)
140  
141  /* File is opened with O_PATH; almost nothing can be done with it */
142  #define FMODE_PATH		((__force fmode_t)0x4000)
143  
144  /* File needs atomic accesses to f_pos */
145  #define FMODE_ATOMIC_POS	((__force fmode_t)0x8000)
146  /* Write access to underlying fs */
147  #define FMODE_WRITER		((__force fmode_t)0x10000)
148  /* Has read method(s) */
149  #define FMODE_CAN_READ          ((__force fmode_t)0x20000)
150  /* Has write method(s) */
151  #define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
152  
153  #define FMODE_OPENED		((__force fmode_t)0x80000)
154  #define FMODE_CREATED		((__force fmode_t)0x100000)
155  
156  /* File is stream-like */
157  #define FMODE_STREAM		((__force fmode_t)0x200000)
158  
159  /* File supports DIRECT IO */
160  #define	FMODE_CAN_ODIRECT	((__force fmode_t)0x400000)
161  
162  #define	FMODE_NOREUSE		((__force fmode_t)0x800000)
163  
164  /* File supports non-exclusive O_DIRECT writes from multiple threads */
165  #define FMODE_DIO_PARALLEL_WRITE	((__force fmode_t)0x1000000)
166  
167  /* File is embedded in backing_file object */
168  #define FMODE_BACKING		((__force fmode_t)0x2000000)
169  
170  /* File was opened by fanotify and shouldn't generate fanotify events */
171  #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
172  
173  /* File is capable of returning -EAGAIN if I/O will block */
174  #define FMODE_NOWAIT		((__force fmode_t)0x8000000)
175  
176  /* File represents mount that needs unmounting */
177  #define FMODE_NEED_UNMOUNT	((__force fmode_t)0x10000000)
178  
179  /* File does not contribute to nr_files count */
180  #define FMODE_NOACCOUNT		((__force fmode_t)0x20000000)
181  
182  /* File supports async buffered reads */
183  #define FMODE_BUF_RASYNC	((__force fmode_t)0x40000000)
184  
185  /* File supports async nowait buffered writes */
186  #define FMODE_BUF_WASYNC	((__force fmode_t)0x80000000)
187  
188  /*
189   * Attribute flags.  These should be or-ed together to figure out what
190   * has been changed!
191   */
192  #define ATTR_MODE	(1 << 0)
193  #define ATTR_UID	(1 << 1)
194  #define ATTR_GID	(1 << 2)
195  #define ATTR_SIZE	(1 << 3)
196  #define ATTR_ATIME	(1 << 4)
197  #define ATTR_MTIME	(1 << 5)
198  #define ATTR_CTIME	(1 << 6)
199  #define ATTR_ATIME_SET	(1 << 7)
200  #define ATTR_MTIME_SET	(1 << 8)
201  #define ATTR_FORCE	(1 << 9) /* Not a change, but a change it */
202  #define ATTR_KILL_SUID	(1 << 11)
203  #define ATTR_KILL_SGID	(1 << 12)
204  #define ATTR_FILE	(1 << 13)
205  #define ATTR_KILL_PRIV	(1 << 14)
206  #define ATTR_OPEN	(1 << 15) /* Truncating from open(O_TRUNC) */
207  #define ATTR_TIMES_SET	(1 << 16)
208  #define ATTR_TOUCH	(1 << 17)
209  
210  /*
211   * Whiteout is represented by a char device.  The following constants define the
212   * mode and device number to use.
213   */
214  #define WHITEOUT_MODE 0
215  #define WHITEOUT_DEV 0
216  
217  /*
218   * This is the Inode Attributes structure, used for notify_change().  It
219   * uses the above definitions as flags, to know which values have changed.
220   * Also, in this manner, a Filesystem can look at only the values it cares
221   * about.  Basically, these are the attributes that the VFS layer can
222   * request to change from the FS layer.
223   *
224   * Derek Atkins <warlord@MIT.EDU> 94-10-20
225   */
226  struct iattr {
227  	unsigned int	ia_valid;
228  	umode_t		ia_mode;
229  	/*
230  	 * The two anonymous unions wrap structures with the same member.
231  	 *
232  	 * Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which
233  	 * are a dedicated type requiring the filesystem to use the dedicated
234  	 * helpers. Other filesystem can continue to use ia_{g,u}id until they
235  	 * have been ported.
236  	 *
237  	 * They always contain the same value. In other words FS_ALLOW_IDMAP
238  	 * pass down the same value on idmapped mounts as they would on regular
239  	 * mounts.
240  	 */
241  	union {
242  		kuid_t		ia_uid;
243  		vfsuid_t	ia_vfsuid;
244  	};
245  	union {
246  		kgid_t		ia_gid;
247  		vfsgid_t	ia_vfsgid;
248  	};
249  	loff_t		ia_size;
250  	struct timespec64 ia_atime;
251  	struct timespec64 ia_mtime;
252  	struct timespec64 ia_ctime;
253  
254  	/*
255  	 * Not an attribute, but an auxiliary info for filesystems wanting to
256  	 * implement an ftruncate() like method.  NOTE: filesystem should
257  	 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL).
258  	 */
259  	struct file	*ia_file;
260  };
261  
262  /*
263   * Includes for diskquotas.
264   */
265  #include <linux/quota.h>
266  
267  /*
268   * Maximum number of layers of fs stack.  Needs to be limited to
269   * prevent kernel stack overflow
270   */
271  #define FILESYSTEM_MAX_STACK_DEPTH 2
272  
273  /**
274   * enum positive_aop_returns - aop return codes with specific semantics
275   *
276   * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
277   * 			    completed, that the page is still locked, and
278   * 			    should be considered active.  The VM uses this hint
279   * 			    to return the page to the active list -- it won't
280   * 			    be a candidate for writeback again in the near
281   * 			    future.  Other callers must be careful to unlock
282   * 			    the page if they get this return.  Returned by
283   * 			    writepage();
284   *
285   * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
286   *  			unlocked it and the page might have been truncated.
287   *  			The caller should back up to acquiring a new page and
288   *  			trying again.  The aop will be taking reasonable
289   *  			precautions not to livelock.  If the caller held a page
290   *  			reference, it should drop it before retrying.  Returned
291   *  			by read_folio().
292   *
293   * address_space_operation functions return these large constants to indicate
294   * special semantics to the caller.  These are much larger than the bytes in a
295   * page to allow for functions that return the number of bytes operated on in a
296   * given page.
297   */
298  
299  enum positive_aop_returns {
300  	AOP_WRITEPAGE_ACTIVATE	= 0x80000,
301  	AOP_TRUNCATED_PAGE	= 0x80001,
302  };
303  
304  /*
305   * oh the beauties of C type declarations.
306   */
307  struct page;
308  struct address_space;
309  struct writeback_control;
310  struct readahead_control;
311  
312  /*
313   * Write life time hint values.
314   * Stored in struct inode as u8.
315   */
316  enum rw_hint {
317  	WRITE_LIFE_NOT_SET	= 0,
318  	WRITE_LIFE_NONE		= RWH_WRITE_LIFE_NONE,
319  	WRITE_LIFE_SHORT	= RWH_WRITE_LIFE_SHORT,
320  	WRITE_LIFE_MEDIUM	= RWH_WRITE_LIFE_MEDIUM,
321  	WRITE_LIFE_LONG		= RWH_WRITE_LIFE_LONG,
322  	WRITE_LIFE_EXTREME	= RWH_WRITE_LIFE_EXTREME,
323  };
324  
325  /* Match RWF_* bits to IOCB bits */
326  #define IOCB_HIPRI		(__force int) RWF_HIPRI
327  #define IOCB_DSYNC		(__force int) RWF_DSYNC
328  #define IOCB_SYNC		(__force int) RWF_SYNC
329  #define IOCB_NOWAIT		(__force int) RWF_NOWAIT
330  #define IOCB_APPEND		(__force int) RWF_APPEND
331  
332  /* non-RWF related bits - start at 16 */
333  #define IOCB_EVENTFD		(1 << 16)
334  #define IOCB_DIRECT		(1 << 17)
335  #define IOCB_WRITE		(1 << 18)
336  /* iocb->ki_waitq is valid */
337  #define IOCB_WAITQ		(1 << 19)
338  #define IOCB_NOIO		(1 << 20)
339  /* can use bio alloc cache */
340  #define IOCB_ALLOC_CACHE	(1 << 21)
341  /*
342   * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
343   * iocb completion can be passed back to the owner for execution from a safe
344   * context rather than needing to be punted through a workqueue. If this
345   * flag is set, the bio completion handling may set iocb->dio_complete to a
346   * handler function and iocb->private to context information for that handler.
347   * The issuer should call the handler with that context information from task
348   * context to complete the processing of the iocb. Note that while this
349   * provides a task context for the dio_complete() callback, it should only be
350   * used on the completion side for non-IO generating completions. It's fine to
351   * call blocking functions from this callback, but they should not wait for
352   * unrelated IO (like cache flushing, new IO generation, etc).
353   */
354  #define IOCB_DIO_CALLER_COMP	(1 << 22)
355  
356  /* for use in trace events */
357  #define TRACE_IOCB_STRINGS \
358  	{ IOCB_HIPRI,		"HIPRI" }, \
359  	{ IOCB_DSYNC,		"DSYNC" }, \
360  	{ IOCB_SYNC,		"SYNC" }, \
361  	{ IOCB_NOWAIT,		"NOWAIT" }, \
362  	{ IOCB_APPEND,		"APPEND" }, \
363  	{ IOCB_EVENTFD,		"EVENTFD"}, \
364  	{ IOCB_DIRECT,		"DIRECT" }, \
365  	{ IOCB_WRITE,		"WRITE" }, \
366  	{ IOCB_WAITQ,		"WAITQ" }, \
367  	{ IOCB_NOIO,		"NOIO" }, \
368  	{ IOCB_ALLOC_CACHE,	"ALLOC_CACHE" }, \
369  	{ IOCB_DIO_CALLER_COMP,	"CALLER_COMP" }
370  
371  struct kiocb {
372  	struct file		*ki_filp;
373  	loff_t			ki_pos;
374  	void (*ki_complete)(struct kiocb *iocb, long ret);
375  	void			*private;
376  	int			ki_flags;
377  	u16			ki_ioprio; /* See linux/ioprio.h */
378  	union {
379  		/*
380  		 * Only used for async buffered reads, where it denotes the
381  		 * page waitqueue associated with completing the read. Valid
382  		 * IFF IOCB_WAITQ is set.
383  		 */
384  		struct wait_page_queue	*ki_waitq;
385  		/*
386  		 * Can be used for O_DIRECT IO, where the completion handling
387  		 * is punted back to the issuer of the IO. May only be set
388  		 * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
389  		 * must then check for presence of this handler when ki_complete
390  		 * is invoked. The data passed in to this handler must be
391  		 * assigned to ->private when dio_complete is assigned.
392  		 */
393  		ssize_t (*dio_complete)(void *data);
394  	};
395  };
396  
is_sync_kiocb(struct kiocb * kiocb)397  static inline bool is_sync_kiocb(struct kiocb *kiocb)
398  {
399  	return kiocb->ki_complete == NULL;
400  }
401  
402  struct address_space_operations {
403  	int (*writepage)(struct page *page, struct writeback_control *wbc);
404  	int (*read_folio)(struct file *, struct folio *);
405  
406  	/* Write back some dirty pages from this mapping. */
407  	int (*writepages)(struct address_space *, struct writeback_control *);
408  
409  	/* Mark a folio dirty.  Return true if this dirtied it */
410  	bool (*dirty_folio)(struct address_space *, struct folio *);
411  
412  	void (*readahead)(struct readahead_control *);
413  
414  	int (*write_begin)(struct file *, struct address_space *mapping,
415  				loff_t pos, unsigned len,
416  				struct page **pagep, void **fsdata);
417  	int (*write_end)(struct file *, struct address_space *mapping,
418  				loff_t pos, unsigned len, unsigned copied,
419  				struct page *page, void *fsdata);
420  
421  	/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
422  	sector_t (*bmap)(struct address_space *, sector_t);
423  	void (*invalidate_folio) (struct folio *, size_t offset, size_t len);
424  	bool (*release_folio)(struct folio *, gfp_t);
425  	void (*free_folio)(struct folio *folio);
426  	ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
427  	/*
428  	 * migrate the contents of a folio to the specified target. If
429  	 * migrate_mode is MIGRATE_ASYNC, it must not block.
430  	 */
431  	int (*migrate_folio)(struct address_space *, struct folio *dst,
432  			struct folio *src, enum migrate_mode);
433  	int (*launder_folio)(struct folio *);
434  	bool (*is_partially_uptodate) (struct folio *, size_t from,
435  			size_t count);
436  	void (*is_dirty_writeback) (struct folio *, bool *dirty, bool *wb);
437  	int (*error_remove_page)(struct address_space *, struct page *);
438  
439  	/* swapfile support */
440  	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
441  				sector_t *span);
442  	void (*swap_deactivate)(struct file *file);
443  	int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
444  };
445  
446  extern const struct address_space_operations empty_aops;
447  
448  /**
449   * struct address_space - Contents of a cacheable, mappable object.
450   * @host: Owner, either the inode or the block_device.
451   * @i_pages: Cached pages.
452   * @invalidate_lock: Guards coherency between page cache contents and
453   *   file offset->disk block mappings in the filesystem during invalidates.
454   *   It is also used to block modification of page cache contents through
455   *   memory mappings.
456   * @gfp_mask: Memory allocation flags to use for allocating pages.
457   * @i_mmap_writable: Number of VM_SHARED mappings.
458   * @nr_thps: Number of THPs in the pagecache (non-shmem only).
459   * @i_mmap: Tree of private and shared mappings.
460   * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
461   * @nrpages: Number of page entries, protected by the i_pages lock.
462   * @writeback_index: Writeback starts here.
463   * @a_ops: Methods.
464   * @flags: Error bits and flags (AS_*).
465   * @wb_err: The most recent error which has occurred.
466   * @private_lock: For use by the owner of the address_space.
467   * @private_list: For use by the owner of the address_space.
468   * @private_data: For use by the owner of the address_space.
469   */
470  struct address_space {
471  	struct inode		*host;
472  	struct xarray		i_pages;
473  	struct rw_semaphore	invalidate_lock;
474  	gfp_t			gfp_mask;
475  	atomic_t		i_mmap_writable;
476  #ifdef CONFIG_READ_ONLY_THP_FOR_FS
477  	/* number of thp, only for non-shmem files */
478  	atomic_t		nr_thps;
479  #endif
480  	struct rb_root_cached	i_mmap;
481  	unsigned long		nrpages;
482  	pgoff_t			writeback_index;
483  	const struct address_space_operations *a_ops;
484  	unsigned long		flags;
485  	struct rw_semaphore	i_mmap_rwsem;
486  	errseq_t		wb_err;
487  	spinlock_t		private_lock;
488  	struct list_head	private_list;
489  	void			*private_data;
490  } __attribute__((aligned(sizeof(long)))) __randomize_layout;
491  	/*
492  	 * On most architectures that alignment is already the case; but
493  	 * must be enforced here for CRIS, to let the least significant bit
494  	 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
495  	 */
496  
497  /* XArray tags, for tagging dirty and writeback pages in the pagecache. */
498  #define PAGECACHE_TAG_DIRTY	XA_MARK_0
499  #define PAGECACHE_TAG_WRITEBACK	XA_MARK_1
500  #define PAGECACHE_TAG_TOWRITE	XA_MARK_2
501  
502  /*
503   * Returns true if any of the pages in the mapping are marked with the tag.
504   */
mapping_tagged(struct address_space * mapping,xa_mark_t tag)505  static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag)
506  {
507  	return xa_marked(&mapping->i_pages, tag);
508  }
509  
i_mmap_lock_write(struct address_space * mapping)510  static inline void i_mmap_lock_write(struct address_space *mapping)
511  {
512  	down_write(&mapping->i_mmap_rwsem);
513  }
514  
i_mmap_trylock_write(struct address_space * mapping)515  static inline int i_mmap_trylock_write(struct address_space *mapping)
516  {
517  	return down_write_trylock(&mapping->i_mmap_rwsem);
518  }
519  
i_mmap_unlock_write(struct address_space * mapping)520  static inline void i_mmap_unlock_write(struct address_space *mapping)
521  {
522  	up_write(&mapping->i_mmap_rwsem);
523  }
524  
i_mmap_trylock_read(struct address_space * mapping)525  static inline int i_mmap_trylock_read(struct address_space *mapping)
526  {
527  	return down_read_trylock(&mapping->i_mmap_rwsem);
528  }
529  
i_mmap_lock_read(struct address_space * mapping)530  static inline void i_mmap_lock_read(struct address_space *mapping)
531  {
532  	down_read(&mapping->i_mmap_rwsem);
533  }
534  
i_mmap_unlock_read(struct address_space * mapping)535  static inline void i_mmap_unlock_read(struct address_space *mapping)
536  {
537  	up_read(&mapping->i_mmap_rwsem);
538  }
539  
i_mmap_assert_locked(struct address_space * mapping)540  static inline void i_mmap_assert_locked(struct address_space *mapping)
541  {
542  	lockdep_assert_held(&mapping->i_mmap_rwsem);
543  }
544  
i_mmap_assert_write_locked(struct address_space * mapping)545  static inline void i_mmap_assert_write_locked(struct address_space *mapping)
546  {
547  	lockdep_assert_held_write(&mapping->i_mmap_rwsem);
548  }
549  
550  /*
551   * Might pages of this file be mapped into userspace?
552   */
mapping_mapped(struct address_space * mapping)553  static inline int mapping_mapped(struct address_space *mapping)
554  {
555  	return	!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root);
556  }
557  
558  /*
559   * Might pages of this file have been modified in userspace?
560   * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
561   * marks vma as VM_SHARED if it is shared, and the file was opened for
562   * writing i.e. vma may be mprotected writable even if now readonly.
563   *
564   * If i_mmap_writable is negative, no new writable mappings are allowed. You
565   * can only deny writable mappings, if none exists right now.
566   */
mapping_writably_mapped(struct address_space * mapping)567  static inline int mapping_writably_mapped(struct address_space *mapping)
568  {
569  	return atomic_read(&mapping->i_mmap_writable) > 0;
570  }
571  
mapping_map_writable(struct address_space * mapping)572  static inline int mapping_map_writable(struct address_space *mapping)
573  {
574  	return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
575  		0 : -EPERM;
576  }
577  
mapping_unmap_writable(struct address_space * mapping)578  static inline void mapping_unmap_writable(struct address_space *mapping)
579  {
580  	atomic_dec(&mapping->i_mmap_writable);
581  }
582  
mapping_deny_writable(struct address_space * mapping)583  static inline int mapping_deny_writable(struct address_space *mapping)
584  {
585  	return atomic_dec_unless_positive(&mapping->i_mmap_writable) ?
586  		0 : -EBUSY;
587  }
588  
mapping_allow_writable(struct address_space * mapping)589  static inline void mapping_allow_writable(struct address_space *mapping)
590  {
591  	atomic_inc(&mapping->i_mmap_writable);
592  }
593  
594  /*
595   * Use sequence counter to get consistent i_size on 32-bit processors.
596   */
597  #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
598  #include <linux/seqlock.h>
599  #define __NEED_I_SIZE_ORDERED
600  #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
601  #else
602  #define i_size_ordered_init(inode) do { } while (0)
603  #endif
604  
605  struct posix_acl;
606  #define ACL_NOT_CACHED ((void *)(-1))
607  /*
608   * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to
609   * cache the ACL.  This also means that ->get_inode_acl() can be called in RCU
610   * mode with the LOOKUP_RCU flag.
611   */
612  #define ACL_DONT_CACHE ((void *)(-3))
613  
614  static inline struct posix_acl *
uncached_acl_sentinel(struct task_struct * task)615  uncached_acl_sentinel(struct task_struct *task)
616  {
617  	return (void *)task + 1;
618  }
619  
620  static inline bool
is_uncached_acl(struct posix_acl * acl)621  is_uncached_acl(struct posix_acl *acl)
622  {
623  	return (long)acl & 1;
624  }
625  
626  #define IOP_FASTPERM	0x0001
627  #define IOP_LOOKUP	0x0002
628  #define IOP_NOFOLLOW	0x0004
629  #define IOP_XATTR	0x0008
630  #define IOP_DEFAULT_READLINK	0x0010
631  
632  struct fsnotify_mark_connector;
633  
634  /*
635   * Keep mostly read-only and often accessed (especially for
636   * the RCU path lookup and 'stat' data) fields at the beginning
637   * of the 'struct inode'
638   */
639  struct inode {
640  	umode_t			i_mode;
641  	unsigned short		i_opflags;
642  	kuid_t			i_uid;
643  	kgid_t			i_gid;
644  	unsigned int		i_flags;
645  
646  #ifdef CONFIG_FS_POSIX_ACL
647  	struct posix_acl	*i_acl;
648  	struct posix_acl	*i_default_acl;
649  #endif
650  
651  	const struct inode_operations	*i_op;
652  	struct super_block	*i_sb;
653  	struct address_space	*i_mapping;
654  
655  #ifdef CONFIG_SECURITY
656  	void			*i_security;
657  #endif
658  
659  	/* Stat data, not accessed from path walking */
660  	unsigned long		i_ino;
661  	/*
662  	 * Filesystems may only read i_nlink directly.  They shall use the
663  	 * following functions for modification:
664  	 *
665  	 *    (set|clear|inc|drop)_nlink
666  	 *    inode_(inc|dec)_link_count
667  	 */
668  	union {
669  		const unsigned int i_nlink;
670  		unsigned int __i_nlink;
671  	};
672  	dev_t			i_rdev;
673  	loff_t			i_size;
674  	struct timespec64	i_atime;
675  	struct timespec64	i_mtime;
676  	struct timespec64	__i_ctime; /* use inode_*_ctime accessors! */
677  	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
678  	unsigned short          i_bytes;
679  	u8			i_blkbits;
680  	u8			i_write_hint;
681  	blkcnt_t		i_blocks;
682  
683  #ifdef __NEED_I_SIZE_ORDERED
684  	seqcount_t		i_size_seqcount;
685  #endif
686  
687  	/* Misc */
688  	unsigned long		i_state;
689  	struct rw_semaphore	i_rwsem;
690  
691  	unsigned long		dirtied_when;	/* jiffies of first dirtying */
692  	unsigned long		dirtied_time_when;
693  
694  	struct hlist_node	i_hash;
695  	struct list_head	i_io_list;	/* backing dev IO list */
696  #ifdef CONFIG_CGROUP_WRITEBACK
697  	struct bdi_writeback	*i_wb;		/* the associated cgroup wb */
698  
699  	/* foreign inode detection, see wbc_detach_inode() */
700  	int			i_wb_frn_winner;
701  	u16			i_wb_frn_avg_time;
702  	u16			i_wb_frn_history;
703  #endif
704  	struct list_head	i_lru;		/* inode LRU list */
705  	struct list_head	i_sb_list;
706  	struct list_head	i_wb_list;	/* backing dev writeback list */
707  	union {
708  		struct hlist_head	i_dentry;
709  		struct rcu_head		i_rcu;
710  	};
711  	atomic64_t		i_version;
712  	atomic64_t		i_sequence; /* see futex */
713  	atomic_t		i_count;
714  	atomic_t		i_dio_count;
715  	atomic_t		i_writecount;
716  #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
717  	atomic_t		i_readcount; /* struct files open RO */
718  #endif
719  	union {
720  		const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
721  		void (*free_inode)(struct inode *);
722  	};
723  	struct file_lock_context	*i_flctx;
724  	struct address_space	i_data;
725  	struct list_head	i_devices;
726  	union {
727  		struct pipe_inode_info	*i_pipe;
728  		struct cdev		*i_cdev;
729  		char			*i_link;
730  		unsigned		i_dir_seq;
731  	};
732  
733  	__u32			i_generation;
734  
735  #ifdef CONFIG_FSNOTIFY
736  	__u32			i_fsnotify_mask; /* all events this inode cares about */
737  	struct fsnotify_mark_connector __rcu	*i_fsnotify_marks;
738  #endif
739  
740  #ifdef CONFIG_FS_ENCRYPTION
741  	struct fscrypt_info	*i_crypt_info;
742  #endif
743  
744  #ifdef CONFIG_FS_VERITY
745  	struct fsverity_info	*i_verity_info;
746  #endif
747  
748  	void			*i_private; /* fs or device private pointer */
749  } __randomize_layout;
750  
751  struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
752  
i_blocksize(const struct inode * node)753  static inline unsigned int i_blocksize(const struct inode *node)
754  {
755  	return (1 << node->i_blkbits);
756  }
757  
inode_unhashed(struct inode * inode)758  static inline int inode_unhashed(struct inode *inode)
759  {
760  	return hlist_unhashed(&inode->i_hash);
761  }
762  
763  /*
764   * __mark_inode_dirty expects inodes to be hashed.  Since we don't
765   * want special inodes in the fileset inode space, we make them
766   * appear hashed, but do not put on any lists.  hlist_del()
767   * will work fine and require no locking.
768   */
inode_fake_hash(struct inode * inode)769  static inline void inode_fake_hash(struct inode *inode)
770  {
771  	hlist_add_fake(&inode->i_hash);
772  }
773  
774  /*
775   * inode->i_mutex nesting subclasses for the lock validator:
776   *
777   * 0: the object of the current VFS operation
778   * 1: parent
779   * 2: child/target
780   * 3: xattr
781   * 4: second non-directory
782   * 5: second parent (when locking independent directories in rename)
783   *
784   * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two
785   * non-directories at once.
786   *
787   * The locking order between these classes is
788   * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory
789   */
790  enum inode_i_mutex_lock_class
791  {
792  	I_MUTEX_NORMAL,
793  	I_MUTEX_PARENT,
794  	I_MUTEX_CHILD,
795  	I_MUTEX_XATTR,
796  	I_MUTEX_NONDIR2,
797  	I_MUTEX_PARENT2,
798  };
799  
inode_lock(struct inode * inode)800  static inline void inode_lock(struct inode *inode)
801  {
802  	down_write(&inode->i_rwsem);
803  }
804  
inode_unlock(struct inode * inode)805  static inline void inode_unlock(struct inode *inode)
806  {
807  	up_write(&inode->i_rwsem);
808  }
809  
inode_lock_shared(struct inode * inode)810  static inline void inode_lock_shared(struct inode *inode)
811  {
812  	down_read(&inode->i_rwsem);
813  }
814  
inode_unlock_shared(struct inode * inode)815  static inline void inode_unlock_shared(struct inode *inode)
816  {
817  	up_read(&inode->i_rwsem);
818  }
819  
inode_trylock(struct inode * inode)820  static inline int inode_trylock(struct inode *inode)
821  {
822  	return down_write_trylock(&inode->i_rwsem);
823  }
824  
inode_trylock_shared(struct inode * inode)825  static inline int inode_trylock_shared(struct inode *inode)
826  {
827  	return down_read_trylock(&inode->i_rwsem);
828  }
829  
inode_is_locked(struct inode * inode)830  static inline int inode_is_locked(struct inode *inode)
831  {
832  	return rwsem_is_locked(&inode->i_rwsem);
833  }
834  
inode_lock_nested(struct inode * inode,unsigned subclass)835  static inline void inode_lock_nested(struct inode *inode, unsigned subclass)
836  {
837  	down_write_nested(&inode->i_rwsem, subclass);
838  }
839  
inode_lock_shared_nested(struct inode * inode,unsigned subclass)840  static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass)
841  {
842  	down_read_nested(&inode->i_rwsem, subclass);
843  }
844  
filemap_invalidate_lock(struct address_space * mapping)845  static inline void filemap_invalidate_lock(struct address_space *mapping)
846  {
847  	down_write(&mapping->invalidate_lock);
848  }
849  
filemap_invalidate_unlock(struct address_space * mapping)850  static inline void filemap_invalidate_unlock(struct address_space *mapping)
851  {
852  	up_write(&mapping->invalidate_lock);
853  }
854  
filemap_invalidate_lock_shared(struct address_space * mapping)855  static inline void filemap_invalidate_lock_shared(struct address_space *mapping)
856  {
857  	down_read(&mapping->invalidate_lock);
858  }
859  
filemap_invalidate_trylock_shared(struct address_space * mapping)860  static inline int filemap_invalidate_trylock_shared(
861  					struct address_space *mapping)
862  {
863  	return down_read_trylock(&mapping->invalidate_lock);
864  }
865  
filemap_invalidate_unlock_shared(struct address_space * mapping)866  static inline void filemap_invalidate_unlock_shared(
867  					struct address_space *mapping)
868  {
869  	up_read(&mapping->invalidate_lock);
870  }
871  
872  void lock_two_nondirectories(struct inode *, struct inode*);
873  void unlock_two_nondirectories(struct inode *, struct inode*);
874  
875  void filemap_invalidate_lock_two(struct address_space *mapping1,
876  				 struct address_space *mapping2);
877  void filemap_invalidate_unlock_two(struct address_space *mapping1,
878  				   struct address_space *mapping2);
879  
880  
881  /*
882   * NOTE: in a 32bit arch with a preemptable kernel and
883   * an UP compile the i_size_read/write must be atomic
884   * with respect to the local cpu (unlike with preempt disabled),
885   * but they don't need to be atomic with respect to other cpus like in
886   * true SMP (so they need either to either locally disable irq around
887   * the read or for example on x86 they can be still implemented as a
888   * cmpxchg8b without the need of the lock prefix). For SMP compiles
889   * and 64bit archs it makes no difference if preempt is enabled or not.
890   */
i_size_read(const struct inode * inode)891  static inline loff_t i_size_read(const struct inode *inode)
892  {
893  #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
894  	loff_t i_size;
895  	unsigned int seq;
896  
897  	do {
898  		seq = read_seqcount_begin(&inode->i_size_seqcount);
899  		i_size = inode->i_size;
900  	} while (read_seqcount_retry(&inode->i_size_seqcount, seq));
901  	return i_size;
902  #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
903  	loff_t i_size;
904  
905  	preempt_disable();
906  	i_size = inode->i_size;
907  	preempt_enable();
908  	return i_size;
909  #else
910  	return inode->i_size;
911  #endif
912  }
913  
914  /*
915   * NOTE: unlike i_size_read(), i_size_write() does need locking around it
916   * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
917   * can be lost, resulting in subsequent i_size_read() calls spinning forever.
918   */
i_size_write(struct inode * inode,loff_t i_size)919  static inline void i_size_write(struct inode *inode, loff_t i_size)
920  {
921  #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
922  	preempt_disable();
923  	write_seqcount_begin(&inode->i_size_seqcount);
924  	inode->i_size = i_size;
925  	write_seqcount_end(&inode->i_size_seqcount);
926  	preempt_enable();
927  #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
928  	preempt_disable();
929  	inode->i_size = i_size;
930  	preempt_enable();
931  #else
932  	inode->i_size = i_size;
933  #endif
934  }
935  
iminor(const struct inode * inode)936  static inline unsigned iminor(const struct inode *inode)
937  {
938  	return MINOR(inode->i_rdev);
939  }
940  
imajor(const struct inode * inode)941  static inline unsigned imajor(const struct inode *inode)
942  {
943  	return MAJOR(inode->i_rdev);
944  }
945  
946  struct fown_struct {
947  	rwlock_t lock;          /* protects pid, uid, euid fields */
948  	struct pid *pid;	/* pid or -pgrp where SIGIO should be sent */
949  	enum pid_type pid_type;	/* Kind of process group SIGIO should be sent to */
950  	kuid_t uid, euid;	/* uid/euid of process setting the owner */
951  	int signum;		/* posix.1b rt signal to be delivered on IO */
952  };
953  
954  /**
955   * struct file_ra_state - Track a file's readahead state.
956   * @start: Where the most recent readahead started.
957   * @size: Number of pages read in the most recent readahead.
958   * @async_size: Numer of pages that were/are not needed immediately
959   *      and so were/are genuinely "ahead".  Start next readahead when
960   *      the first of these pages is accessed.
961   * @ra_pages: Maximum size of a readahead request, copied from the bdi.
962   * @mmap_miss: How many mmap accesses missed in the page cache.
963   * @prev_pos: The last byte in the most recent read request.
964   *
965   * When this structure is passed to ->readahead(), the "most recent"
966   * readahead means the current readahead.
967   */
968  struct file_ra_state {
969  	pgoff_t start;
970  	unsigned int size;
971  	unsigned int async_size;
972  	unsigned int ra_pages;
973  	unsigned int mmap_miss;
974  	loff_t prev_pos;
975  };
976  
977  /*
978   * Check if @index falls in the readahead windows.
979   */
ra_has_index(struct file_ra_state * ra,pgoff_t index)980  static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
981  {
982  	return (index >= ra->start &&
983  		index <  ra->start + ra->size);
984  }
985  
986  /*
987   * f_{lock,count,pos_lock} members can be highly contended and share
988   * the same cacheline. f_{lock,mode} are very frequently used together
989   * and so share the same cacheline as well. The read-mostly
990   * f_{path,inode,op} are kept on a separate cacheline.
991   */
992  struct file {
993  	union {
994  		struct llist_node	f_llist;
995  		struct rcu_head 	f_rcuhead;
996  		unsigned int 		f_iocb_flags;
997  	};
998  
999  	/*
1000  	 * Protects f_ep, f_flags.
1001  	 * Must not be taken from IRQ context.
1002  	 */
1003  	spinlock_t		f_lock;
1004  	fmode_t			f_mode;
1005  	atomic_long_t		f_count;
1006  	struct mutex		f_pos_lock;
1007  	loff_t			f_pos;
1008  	unsigned int		f_flags;
1009  	struct fown_struct	f_owner;
1010  	const struct cred	*f_cred;
1011  	struct file_ra_state	f_ra;
1012  	struct path		f_path;
1013  	struct inode		*f_inode;	/* cached value */
1014  	const struct file_operations	*f_op;
1015  
1016  	u64			f_version;
1017  #ifdef CONFIG_SECURITY
1018  	void			*f_security;
1019  #endif
1020  	/* needed for tty driver, and maybe others */
1021  	void			*private_data;
1022  
1023  #ifdef CONFIG_EPOLL
1024  	/* Used by fs/eventpoll.c to link all the hooks to this file */
1025  	struct hlist_head	*f_ep;
1026  #endif /* #ifdef CONFIG_EPOLL */
1027  	struct address_space	*f_mapping;
1028  	errseq_t		f_wb_err;
1029  	errseq_t		f_sb_err; /* for syncfs */
1030  } __randomize_layout
1031    __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
1032  
1033  struct file_handle {
1034  	__u32 handle_bytes;
1035  	int handle_type;
1036  	/* file identifier */
1037  	unsigned char f_handle[];
1038  };
1039  
get_file(struct file * f)1040  static inline struct file *get_file(struct file *f)
1041  {
1042  	atomic_long_inc(&f->f_count);
1043  	return f;
1044  }
1045  #define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
1046  #define file_count(x)	atomic_long_read(&(x)->f_count)
1047  
1048  #define	MAX_NON_LFS	((1UL<<31) - 1)
1049  
1050  /* Page cache limit. The filesystems should put that into their s_maxbytes
1051     limits, otherwise bad things can happen in VM. */
1052  #if BITS_PER_LONG==32
1053  #define MAX_LFS_FILESIZE	((loff_t)ULONG_MAX << PAGE_SHIFT)
1054  #elif BITS_PER_LONG==64
1055  #define MAX_LFS_FILESIZE 	((loff_t)LLONG_MAX)
1056  #endif
1057  
1058  /* legacy typedef, should eventually be removed */
1059  typedef void *fl_owner_t;
1060  
1061  struct file_lock;
1062  
1063  /* The following constant reflects the upper bound of the file/locking space */
1064  #ifndef OFFSET_MAX
1065  #define OFFSET_MAX	type_max(loff_t)
1066  #define OFFT_OFFSET_MAX	type_max(off_t)
1067  #endif
1068  
1069  extern void send_sigio(struct fown_struct *fown, int fd, int band);
1070  
file_inode(const struct file * f)1071  static inline struct inode *file_inode(const struct file *f)
1072  {
1073  	return f->f_inode;
1074  }
1075  
file_dentry(const struct file * file)1076  static inline struct dentry *file_dentry(const struct file *file)
1077  {
1078  	return d_real(file->f_path.dentry, file_inode(file));
1079  }
1080  
1081  struct fasync_struct {
1082  	rwlock_t		fa_lock;
1083  	int			magic;
1084  	int			fa_fd;
1085  	struct fasync_struct	*fa_next; /* singly linked list */
1086  	struct file		*fa_file;
1087  	struct rcu_head		fa_rcu;
1088  };
1089  
1090  #define FASYNC_MAGIC 0x4601
1091  
1092  /* SMP safe fasync helpers: */
1093  extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
1094  extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *);
1095  extern int fasync_remove_entry(struct file *, struct fasync_struct **);
1096  extern struct fasync_struct *fasync_alloc(void);
1097  extern void fasync_free(struct fasync_struct *);
1098  
1099  /* can be called from interrupts */
1100  extern void kill_fasync(struct fasync_struct **, int, int);
1101  
1102  extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
1103  extern int f_setown(struct file *filp, int who, int force);
1104  extern void f_delown(struct file *filp);
1105  extern pid_t f_getown(struct file *filp);
1106  extern int send_sigurg(struct fown_struct *fown);
1107  
1108  /*
1109   * sb->s_flags.  Note that these mirror the equivalent MS_* flags where
1110   * represented in both.
1111   */
1112  #define SB_RDONLY       BIT(0)	/* Mount read-only */
1113  #define SB_NOSUID       BIT(1)	/* Ignore suid and sgid bits */
1114  #define SB_NODEV        BIT(2)	/* Disallow access to device special files */
1115  #define SB_NOEXEC       BIT(3)	/* Disallow program execution */
1116  #define SB_SYNCHRONOUS  BIT(4)	/* Writes are synced at once */
1117  #define SB_MANDLOCK     BIT(6)	/* Allow mandatory locks on an FS */
1118  #define SB_DIRSYNC      BIT(7)	/* Directory modifications are synchronous */
1119  #define SB_NOATIME      BIT(10)	/* Do not update access times. */
1120  #define SB_NODIRATIME   BIT(11)	/* Do not update directory access times */
1121  #define SB_SILENT       BIT(15)
1122  #define SB_POSIXACL     BIT(16)	/* VFS does not apply the umask */
1123  #define SB_INLINECRYPT  BIT(17)	/* Use blk-crypto for encrypted files */
1124  #define SB_KERNMOUNT    BIT(22)	/* this is a kern_mount call */
1125  #define SB_I_VERSION    BIT(23)	/* Update inode I_version field */
1126  #define SB_LAZYTIME     BIT(25)	/* Update the on-disk [acm]times lazily */
1127  
1128  /* These sb flags are internal to the kernel */
1129  #define SB_DEAD         BIT(21)
1130  #define SB_DYING        BIT(24)
1131  #define SB_SUBMOUNT     BIT(26)
1132  #define SB_FORCE        BIT(27)
1133  #define SB_NOSEC        BIT(28)
1134  #define SB_BORN         BIT(29)
1135  #define SB_ACTIVE       BIT(30)
1136  #define SB_NOUSER       BIT(31)
1137  
1138  /* These flags relate to encoding and casefolding */
1139  #define SB_ENC_STRICT_MODE_FL	(1 << 0)
1140  
1141  #define sb_has_strict_encoding(sb) \
1142  	(sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
1143  
1144  /*
1145   *	Umount options
1146   */
1147  
1148  #define MNT_FORCE	0x00000001	/* Attempt to forcibily umount */
1149  #define MNT_DETACH	0x00000002	/* Just detach from the tree */
1150  #define MNT_EXPIRE	0x00000004	/* Mark for expiry */
1151  #define UMOUNT_NOFOLLOW	0x00000008	/* Don't follow symlink on umount */
1152  #define UMOUNT_UNUSED	0x80000000	/* Flag guaranteed to be unused */
1153  
1154  /* sb->s_iflags */
1155  #define SB_I_CGROUPWB	0x00000001	/* cgroup-aware writeback enabled */
1156  #define SB_I_NOEXEC	0x00000002	/* Ignore executables on this fs */
1157  #define SB_I_NODEV	0x00000004	/* Ignore devices on this fs */
1158  #define SB_I_STABLE_WRITES 0x00000008	/* don't modify blks until WB is done */
1159  
1160  /* sb->s_iflags to limit user namespace mounts */
1161  #define SB_I_USERNS_VISIBLE		0x00000010 /* fstype already mounted */
1162  #define SB_I_IMA_UNVERIFIABLE_SIGNATURE	0x00000020
1163  #define SB_I_UNTRUSTED_MOUNTER		0x00000040
1164  
1165  #define SB_I_SKIP_SYNC	0x00000100	/* Skip superblock at global sync */
1166  #define SB_I_PERSB_BDI	0x00000200	/* has a per-sb bdi */
1167  #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
1168  #define SB_I_RETIRED	0x00000800	/* superblock shouldn't be reused */
1169  
1170  /* Possible states of 'frozen' field */
1171  enum {
1172  	SB_UNFROZEN = 0,		/* FS is unfrozen */
1173  	SB_FREEZE_WRITE	= 1,		/* Writes, dir ops, ioctls frozen */
1174  	SB_FREEZE_PAGEFAULT = 2,	/* Page faults stopped as well */
1175  	SB_FREEZE_FS = 3,		/* For internal FS use (e.g. to stop
1176  					 * internal threads if needed) */
1177  	SB_FREEZE_COMPLETE = 4,		/* ->freeze_fs finished successfully */
1178  };
1179  
1180  #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
1181  
1182  struct sb_writers {
1183  	unsigned short			frozen;		/* Is sb frozen? */
1184  	unsigned short			freeze_holders;	/* Who froze fs? */
1185  	struct percpu_rw_semaphore	rw_sem[SB_FREEZE_LEVELS];
1186  };
1187  
1188  struct super_block {
1189  	struct list_head	s_list;		/* Keep this first */
1190  	dev_t			s_dev;		/* search index; _not_ kdev_t */
1191  	unsigned char		s_blocksize_bits;
1192  	unsigned long		s_blocksize;
1193  	loff_t			s_maxbytes;	/* Max file size */
1194  	struct file_system_type	*s_type;
1195  	const struct super_operations	*s_op;
1196  	const struct dquot_operations	*dq_op;
1197  	const struct quotactl_ops	*s_qcop;
1198  	const struct export_operations *s_export_op;
1199  	unsigned long		s_flags;
1200  	unsigned long		s_iflags;	/* internal SB_I_* flags */
1201  	unsigned long		s_magic;
1202  	struct dentry		*s_root;
1203  	struct rw_semaphore	s_umount;
1204  	int			s_count;
1205  	atomic_t		s_active;
1206  #ifdef CONFIG_SECURITY
1207  	void                    *s_security;
1208  #endif
1209  	const struct xattr_handler **s_xattr;
1210  #ifdef CONFIG_FS_ENCRYPTION
1211  	const struct fscrypt_operations	*s_cop;
1212  	struct fscrypt_keyring	*s_master_keys; /* master crypto keys in use */
1213  #endif
1214  #ifdef CONFIG_FS_VERITY
1215  	const struct fsverity_operations *s_vop;
1216  #endif
1217  #if IS_ENABLED(CONFIG_UNICODE)
1218  	struct unicode_map *s_encoding;
1219  	__u16 s_encoding_flags;
1220  #endif
1221  	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
1222  	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
1223  	struct block_device	*s_bdev;
1224  	struct backing_dev_info *s_bdi;
1225  	struct mtd_info		*s_mtd;
1226  	struct hlist_node	s_instances;
1227  	unsigned int		s_quota_types;	/* Bitmask of supported quota types */
1228  	struct quota_info	s_dquot;	/* Diskquota specific options */
1229  
1230  	struct sb_writers	s_writers;
1231  
1232  	/*
1233  	 * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
1234  	 * s_fsnotify_marks together for cache efficiency. They are frequently
1235  	 * accessed and rarely modified.
1236  	 */
1237  	void			*s_fs_info;	/* Filesystem private info */
1238  
1239  	/* Granularity of c/m/atime in ns (cannot be worse than a second) */
1240  	u32			s_time_gran;
1241  	/* Time limits for c/m/atime in seconds */
1242  	time64_t		   s_time_min;
1243  	time64_t		   s_time_max;
1244  #ifdef CONFIG_FSNOTIFY
1245  	__u32			s_fsnotify_mask;
1246  	struct fsnotify_mark_connector __rcu	*s_fsnotify_marks;
1247  #endif
1248  
1249  	char			s_id[32];	/* Informational name */
1250  	uuid_t			s_uuid;		/* UUID */
1251  
1252  	unsigned int		s_max_links;
1253  
1254  	/*
1255  	 * The next field is for VFS *only*. No filesystems have any business
1256  	 * even looking at it. You had been warned.
1257  	 */
1258  	struct mutex s_vfs_rename_mutex;	/* Kludge */
1259  
1260  	/*
1261  	 * Filesystem subtype.  If non-empty the filesystem type field
1262  	 * in /proc/mounts will be "type.subtype"
1263  	 */
1264  	const char *s_subtype;
1265  
1266  	const struct dentry_operations *s_d_op; /* default d_op for dentries */
1267  
1268  	struct shrinker s_shrink;	/* per-sb shrinker handle */
1269  
1270  	/* Number of inodes with nlink == 0 but still referenced */
1271  	atomic_long_t s_remove_count;
1272  
1273  	/*
1274  	 * Number of inode/mount/sb objects that are being watched, note that
1275  	 * inodes objects are currently double-accounted.
1276  	 */
1277  	atomic_long_t s_fsnotify_connectors;
1278  
1279  	/* Read-only state of the superblock is being changed */
1280  	int s_readonly_remount;
1281  
1282  	/* per-sb errseq_t for reporting writeback errors via syncfs */
1283  	errseq_t s_wb_err;
1284  
1285  	/* AIO completions deferred from interrupt context */
1286  	struct workqueue_struct *s_dio_done_wq;
1287  	struct hlist_head s_pins;
1288  
1289  	/*
1290  	 * Owning user namespace and default context in which to
1291  	 * interpret filesystem uids, gids, quotas, device nodes,
1292  	 * xattrs and security labels.
1293  	 */
1294  	struct user_namespace *s_user_ns;
1295  
1296  	/*
1297  	 * The list_lru structure is essentially just a pointer to a table
1298  	 * of per-node lru lists, each of which has its own spinlock.
1299  	 * There is no need to put them into separate cachelines.
1300  	 */
1301  	struct list_lru		s_dentry_lru;
1302  	struct list_lru		s_inode_lru;
1303  	struct rcu_head		rcu;
1304  	struct work_struct	destroy_work;
1305  
1306  	struct mutex		s_sync_lock;	/* sync serialisation lock */
1307  
1308  	/*
1309  	 * Indicates how deep in a filesystem stack this SB is
1310  	 */
1311  	int s_stack_depth;
1312  
1313  	/* s_inode_list_lock protects s_inodes */
1314  	spinlock_t		s_inode_list_lock ____cacheline_aligned_in_smp;
1315  	struct list_head	s_inodes;	/* all inodes */
1316  
1317  	spinlock_t		s_inode_wblist_lock;
1318  	struct list_head	s_inodes_wb;	/* writeback inodes */
1319  } __randomize_layout;
1320  
i_user_ns(const struct inode * inode)1321  static inline struct user_namespace *i_user_ns(const struct inode *inode)
1322  {
1323  	return inode->i_sb->s_user_ns;
1324  }
1325  
1326  /* Helper functions so that in most cases filesystems will
1327   * not need to deal directly with kuid_t and kgid_t and can
1328   * instead deal with the raw numeric values that are stored
1329   * in the filesystem.
1330   */
i_uid_read(const struct inode * inode)1331  static inline uid_t i_uid_read(const struct inode *inode)
1332  {
1333  	return from_kuid(i_user_ns(inode), inode->i_uid);
1334  }
1335  
i_gid_read(const struct inode * inode)1336  static inline gid_t i_gid_read(const struct inode *inode)
1337  {
1338  	return from_kgid(i_user_ns(inode), inode->i_gid);
1339  }
1340  
i_uid_write(struct inode * inode,uid_t uid)1341  static inline void i_uid_write(struct inode *inode, uid_t uid)
1342  {
1343  	inode->i_uid = make_kuid(i_user_ns(inode), uid);
1344  }
1345  
i_gid_write(struct inode * inode,gid_t gid)1346  static inline void i_gid_write(struct inode *inode, gid_t gid)
1347  {
1348  	inode->i_gid = make_kgid(i_user_ns(inode), gid);
1349  }
1350  
1351  /**
1352   * i_uid_into_vfsuid - map an inode's i_uid down according to an idmapping
1353   * @idmap: idmap of the mount the inode was found from
1354   * @inode: inode to map
1355   *
1356   * Return: whe inode's i_uid mapped down according to @idmap.
1357   * If the inode's i_uid has no mapping INVALID_VFSUID is returned.
1358   */
i_uid_into_vfsuid(struct mnt_idmap * idmap,const struct inode * inode)1359  static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap,
1360  					 const struct inode *inode)
1361  {
1362  	return make_vfsuid(idmap, i_user_ns(inode), inode->i_uid);
1363  }
1364  
1365  /**
1366   * i_uid_needs_update - check whether inode's i_uid needs to be updated
1367   * @idmap: idmap of the mount the inode was found from
1368   * @attr: the new attributes of @inode
1369   * @inode: the inode to update
1370   *
1371   * Check whether the $inode's i_uid field needs to be updated taking idmapped
1372   * mounts into account if the filesystem supports it.
1373   *
1374   * Return: true if @inode's i_uid field needs to be updated, false if not.
1375   */
i_uid_needs_update(struct mnt_idmap * idmap,const struct iattr * attr,const struct inode * inode)1376  static inline bool i_uid_needs_update(struct mnt_idmap *idmap,
1377  				      const struct iattr *attr,
1378  				      const struct inode *inode)
1379  {
1380  	return ((attr->ia_valid & ATTR_UID) &&
1381  		!vfsuid_eq(attr->ia_vfsuid,
1382  			   i_uid_into_vfsuid(idmap, inode)));
1383  }
1384  
1385  /**
1386   * i_uid_update - update @inode's i_uid field
1387   * @idmap: idmap of the mount the inode was found from
1388   * @attr: the new attributes of @inode
1389   * @inode: the inode to update
1390   *
1391   * Safely update @inode's i_uid field translating the vfsuid of any idmapped
1392   * mount into the filesystem kuid.
1393   */
i_uid_update(struct mnt_idmap * idmap,const struct iattr * attr,struct inode * inode)1394  static inline void i_uid_update(struct mnt_idmap *idmap,
1395  				const struct iattr *attr,
1396  				struct inode *inode)
1397  {
1398  	if (attr->ia_valid & ATTR_UID)
1399  		inode->i_uid = from_vfsuid(idmap, i_user_ns(inode),
1400  					   attr->ia_vfsuid);
1401  }
1402  
1403  /**
1404   * i_gid_into_vfsgid - map an inode's i_gid down according to an idmapping
1405   * @idmap: idmap of the mount the inode was found from
1406   * @inode: inode to map
1407   *
1408   * Return: the inode's i_gid mapped down according to @idmap.
1409   * If the inode's i_gid has no mapping INVALID_VFSGID is returned.
1410   */
i_gid_into_vfsgid(struct mnt_idmap * idmap,const struct inode * inode)1411  static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap,
1412  					 const struct inode *inode)
1413  {
1414  	return make_vfsgid(idmap, i_user_ns(inode), inode->i_gid);
1415  }
1416  
1417  /**
1418   * i_gid_needs_update - check whether inode's i_gid needs to be updated
1419   * @idmap: idmap of the mount the inode was found from
1420   * @attr: the new attributes of @inode
1421   * @inode: the inode to update
1422   *
1423   * Check whether the $inode's i_gid field needs to be updated taking idmapped
1424   * mounts into account if the filesystem supports it.
1425   *
1426   * Return: true if @inode's i_gid field needs to be updated, false if not.
1427   */
i_gid_needs_update(struct mnt_idmap * idmap,const struct iattr * attr,const struct inode * inode)1428  static inline bool i_gid_needs_update(struct mnt_idmap *idmap,
1429  				      const struct iattr *attr,
1430  				      const struct inode *inode)
1431  {
1432  	return ((attr->ia_valid & ATTR_GID) &&
1433  		!vfsgid_eq(attr->ia_vfsgid,
1434  			   i_gid_into_vfsgid(idmap, inode)));
1435  }
1436  
1437  /**
1438   * i_gid_update - update @inode's i_gid field
1439   * @idmap: idmap of the mount the inode was found from
1440   * @attr: the new attributes of @inode
1441   * @inode: the inode to update
1442   *
1443   * Safely update @inode's i_gid field translating the vfsgid of any idmapped
1444   * mount into the filesystem kgid.
1445   */
i_gid_update(struct mnt_idmap * idmap,const struct iattr * attr,struct inode * inode)1446  static inline void i_gid_update(struct mnt_idmap *idmap,
1447  				const struct iattr *attr,
1448  				struct inode *inode)
1449  {
1450  	if (attr->ia_valid & ATTR_GID)
1451  		inode->i_gid = from_vfsgid(idmap, i_user_ns(inode),
1452  					   attr->ia_vfsgid);
1453  }
1454  
1455  /**
1456   * inode_fsuid_set - initialize inode's i_uid field with callers fsuid
1457   * @inode: inode to initialize
1458   * @idmap: idmap of the mount the inode was found from
1459   *
1460   * Initialize the i_uid field of @inode. If the inode was found/created via
1461   * an idmapped mount map the caller's fsuid according to @idmap.
1462   */
inode_fsuid_set(struct inode * inode,struct mnt_idmap * idmap)1463  static inline void inode_fsuid_set(struct inode *inode,
1464  				   struct mnt_idmap *idmap)
1465  {
1466  	inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode));
1467  }
1468  
1469  /**
1470   * inode_fsgid_set - initialize inode's i_gid field with callers fsgid
1471   * @inode: inode to initialize
1472   * @idmap: idmap of the mount the inode was found from
1473   *
1474   * Initialize the i_gid field of @inode. If the inode was found/created via
1475   * an idmapped mount map the caller's fsgid according to @idmap.
1476   */
inode_fsgid_set(struct inode * inode,struct mnt_idmap * idmap)1477  static inline void inode_fsgid_set(struct inode *inode,
1478  				   struct mnt_idmap *idmap)
1479  {
1480  	inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode));
1481  }
1482  
1483  /**
1484   * fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped
1485   * @sb: the superblock we want a mapping in
1486   * @idmap: idmap of the relevant mount
1487   *
1488   * Check whether the caller's fsuid and fsgid have a valid mapping in the
1489   * s_user_ns of the superblock @sb. If the caller is on an idmapped mount map
1490   * the caller's fsuid and fsgid according to the @idmap first.
1491   *
1492   * Return: true if fsuid and fsgid is mapped, false if not.
1493   */
fsuidgid_has_mapping(struct super_block * sb,struct mnt_idmap * idmap)1494  static inline bool fsuidgid_has_mapping(struct super_block *sb,
1495  					struct mnt_idmap *idmap)
1496  {
1497  	struct user_namespace *fs_userns = sb->s_user_ns;
1498  	kuid_t kuid;
1499  	kgid_t kgid;
1500  
1501  	kuid = mapped_fsuid(idmap, fs_userns);
1502  	if (!uid_valid(kuid))
1503  		return false;
1504  	kgid = mapped_fsgid(idmap, fs_userns);
1505  	if (!gid_valid(kgid))
1506  		return false;
1507  	return kuid_has_mapping(fs_userns, kuid) &&
1508  	       kgid_has_mapping(fs_userns, kgid);
1509  }
1510  
1511  struct timespec64 current_time(struct inode *inode);
1512  struct timespec64 inode_set_ctime_current(struct inode *inode);
1513  
1514  /**
1515   * inode_get_ctime - fetch the current ctime from the inode
1516   * @inode: inode from which to fetch ctime
1517   *
1518   * Grab the current ctime from the inode and return it.
1519   */
inode_get_ctime(const struct inode * inode)1520  static inline struct timespec64 inode_get_ctime(const struct inode *inode)
1521  {
1522  	return inode->__i_ctime;
1523  }
1524  
1525  /**
1526   * inode_set_ctime_to_ts - set the ctime in the inode
1527   * @inode: inode in which to set the ctime
1528   * @ts: value to set in the ctime field
1529   *
1530   * Set the ctime in @inode to @ts
1531   */
inode_set_ctime_to_ts(struct inode * inode,struct timespec64 ts)1532  static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
1533  						      struct timespec64 ts)
1534  {
1535  	inode->__i_ctime = ts;
1536  	return ts;
1537  }
1538  
1539  /**
1540   * inode_set_ctime - set the ctime in the inode
1541   * @inode: inode in which to set the ctime
1542   * @sec: tv_sec value to set
1543   * @nsec: tv_nsec value to set
1544   *
1545   * Set the ctime in @inode to { @sec, @nsec }
1546   */
inode_set_ctime(struct inode * inode,time64_t sec,long nsec)1547  static inline struct timespec64 inode_set_ctime(struct inode *inode,
1548  						time64_t sec, long nsec)
1549  {
1550  	struct timespec64 ts = { .tv_sec  = sec,
1551  				 .tv_nsec = nsec };
1552  
1553  	return inode_set_ctime_to_ts(inode, ts);
1554  }
1555  
1556  /*
1557   * Snapshotting support.
1558   */
1559  
1560  /*
1561   * These are internal functions, please use sb_start_{write,pagefault,intwrite}
1562   * instead.
1563   */
__sb_end_write(struct super_block * sb,int level)1564  static inline void __sb_end_write(struct super_block *sb, int level)
1565  {
1566  	percpu_up_read(sb->s_writers.rw_sem + level-1);
1567  }
1568  
__sb_start_write(struct super_block * sb,int level)1569  static inline void __sb_start_write(struct super_block *sb, int level)
1570  {
1571  	percpu_down_read(sb->s_writers.rw_sem + level - 1);
1572  }
1573  
__sb_start_write_trylock(struct super_block * sb,int level)1574  static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
1575  {
1576  	return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1);
1577  }
1578  
1579  #define __sb_writers_acquired(sb, lev)	\
1580  	percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
1581  #define __sb_writers_release(sb, lev)	\
1582  	percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
1583  
sb_write_started(const struct super_block * sb)1584  static inline bool sb_write_started(const struct super_block *sb)
1585  {
1586  	return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1);
1587  }
1588  
1589  /**
1590   * sb_end_write - drop write access to a superblock
1591   * @sb: the super we wrote to
1592   *
1593   * Decrement number of writers to the filesystem. Wake up possible waiters
1594   * wanting to freeze the filesystem.
1595   */
sb_end_write(struct super_block * sb)1596  static inline void sb_end_write(struct super_block *sb)
1597  {
1598  	__sb_end_write(sb, SB_FREEZE_WRITE);
1599  }
1600  
1601  /**
1602   * sb_end_pagefault - drop write access to a superblock from a page fault
1603   * @sb: the super we wrote to
1604   *
1605   * Decrement number of processes handling write page fault to the filesystem.
1606   * Wake up possible waiters wanting to freeze the filesystem.
1607   */
sb_end_pagefault(struct super_block * sb)1608  static inline void sb_end_pagefault(struct super_block *sb)
1609  {
1610  	__sb_end_write(sb, SB_FREEZE_PAGEFAULT);
1611  }
1612  
1613  /**
1614   * sb_end_intwrite - drop write access to a superblock for internal fs purposes
1615   * @sb: the super we wrote to
1616   *
1617   * Decrement fs-internal number of writers to the filesystem.  Wake up possible
1618   * waiters wanting to freeze the filesystem.
1619   */
sb_end_intwrite(struct super_block * sb)1620  static inline void sb_end_intwrite(struct super_block *sb)
1621  {
1622  	__sb_end_write(sb, SB_FREEZE_FS);
1623  }
1624  
1625  /**
1626   * sb_start_write - get write access to a superblock
1627   * @sb: the super we write to
1628   *
1629   * When a process wants to write data or metadata to a file system (i.e. dirty
1630   * a page or an inode), it should embed the operation in a sb_start_write() -
1631   * sb_end_write() pair to get exclusion against file system freezing. This
1632   * function increments number of writers preventing freezing. If the file
1633   * system is already frozen, the function waits until the file system is
1634   * thawed.
1635   *
1636   * Since freeze protection behaves as a lock, users have to preserve
1637   * ordering of freeze protection and other filesystem locks. Generally,
1638   * freeze protection should be the outermost lock. In particular, we have:
1639   *
1640   * sb_start_write
1641   *   -> i_mutex			(write path, truncate, directory ops, ...)
1642   *   -> s_umount		(freeze_super, thaw_super)
1643   */
sb_start_write(struct super_block * sb)1644  static inline void sb_start_write(struct super_block *sb)
1645  {
1646  	__sb_start_write(sb, SB_FREEZE_WRITE);
1647  }
1648  
sb_start_write_trylock(struct super_block * sb)1649  static inline bool sb_start_write_trylock(struct super_block *sb)
1650  {
1651  	return __sb_start_write_trylock(sb, SB_FREEZE_WRITE);
1652  }
1653  
1654  /**
1655   * sb_start_pagefault - get write access to a superblock from a page fault
1656   * @sb: the super we write to
1657   *
1658   * When a process starts handling write page fault, it should embed the
1659   * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
1660   * exclusion against file system freezing. This is needed since the page fault
1661   * is going to dirty a page. This function increments number of running page
1662   * faults preventing freezing. If the file system is already frozen, the
1663   * function waits until the file system is thawed.
1664   *
1665   * Since page fault freeze protection behaves as a lock, users have to preserve
1666   * ordering of freeze protection and other filesystem locks. It is advised to
1667   * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault
1668   * handling code implies lock dependency:
1669   *
1670   * mmap_lock
1671   *   -> sb_start_pagefault
1672   */
sb_start_pagefault(struct super_block * sb)1673  static inline void sb_start_pagefault(struct super_block *sb)
1674  {
1675  	__sb_start_write(sb, SB_FREEZE_PAGEFAULT);
1676  }
1677  
1678  /**
1679   * sb_start_intwrite - get write access to a superblock for internal fs purposes
1680   * @sb: the super we write to
1681   *
1682   * This is the third level of protection against filesystem freezing. It is
1683   * free for use by a filesystem. The only requirement is that it must rank
1684   * below sb_start_pagefault.
1685   *
1686   * For example filesystem can call sb_start_intwrite() when starting a
1687   * transaction which somewhat eases handling of freezing for internal sources
1688   * of filesystem changes (internal fs threads, discarding preallocation on file
1689   * close, etc.).
1690   */
sb_start_intwrite(struct super_block * sb)1691  static inline void sb_start_intwrite(struct super_block *sb)
1692  {
1693  	__sb_start_write(sb, SB_FREEZE_FS);
1694  }
1695  
sb_start_intwrite_trylock(struct super_block * sb)1696  static inline bool sb_start_intwrite_trylock(struct super_block *sb)
1697  {
1698  	return __sb_start_write_trylock(sb, SB_FREEZE_FS);
1699  }
1700  
1701  bool inode_owner_or_capable(struct mnt_idmap *idmap,
1702  			    const struct inode *inode);
1703  
1704  /*
1705   * VFS helper functions..
1706   */
1707  int vfs_create(struct mnt_idmap *, struct inode *,
1708  	       struct dentry *, umode_t, bool);
1709  int vfs_mkdir(struct mnt_idmap *, struct inode *,
1710  	      struct dentry *, umode_t);
1711  int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
1712                umode_t, dev_t);
1713  int vfs_symlink(struct mnt_idmap *, struct inode *,
1714  		struct dentry *, const char *);
1715  int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *,
1716  	     struct dentry *, struct inode **);
1717  int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *);
1718  int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *,
1719  	       struct inode **);
1720  
1721  /**
1722   * struct renamedata - contains all information required for renaming
1723   * @old_mnt_idmap:     idmap of the old mount the inode was found from
1724   * @old_dir:           parent of source
1725   * @old_dentry:                source
1726   * @new_mnt_idmap:     idmap of the new mount the inode was found from
1727   * @new_dir:           parent of destination
1728   * @new_dentry:                destination
1729   * @delegated_inode:   returns an inode needing a delegation break
1730   * @flags:             rename flags
1731   */
1732  struct renamedata {
1733  	struct mnt_idmap *old_mnt_idmap;
1734  	struct inode *old_dir;
1735  	struct dentry *old_dentry;
1736  	struct mnt_idmap *new_mnt_idmap;
1737  	struct inode *new_dir;
1738  	struct dentry *new_dentry;
1739  	struct inode **delegated_inode;
1740  	unsigned int flags;
1741  } __randomize_layout;
1742  
1743  int vfs_rename(struct renamedata *);
1744  
vfs_whiteout(struct mnt_idmap * idmap,struct inode * dir,struct dentry * dentry)1745  static inline int vfs_whiteout(struct mnt_idmap *idmap,
1746  			       struct inode *dir, struct dentry *dentry)
1747  {
1748  	return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE,
1749  			 WHITEOUT_DEV);
1750  }
1751  
1752  struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
1753  				 const struct path *parentpath,
1754  				 umode_t mode, int open_flag,
1755  				 const struct cred *cred);
1756  struct file *kernel_file_open(const struct path *path, int flags,
1757  			      struct inode *inode, const struct cred *cred);
1758  
1759  int vfs_mkobj(struct dentry *, umode_t,
1760  		int (*f)(struct dentry *, umode_t, void *),
1761  		void *);
1762  
1763  int vfs_fchown(struct file *file, uid_t user, gid_t group);
1764  int vfs_fchmod(struct file *file, umode_t mode);
1765  int vfs_utimes(const struct path *path, struct timespec64 *times);
1766  
1767  extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1768  
1769  #ifdef CONFIG_COMPAT
1770  extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
1771  					unsigned long arg);
1772  #else
1773  #define compat_ptr_ioctl NULL
1774  #endif
1775  
1776  /*
1777   * VFS file helper functions.
1778   */
1779  void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
1780  		      const struct inode *dir, umode_t mode);
1781  extern bool may_open_dev(const struct path *path);
1782  umode_t mode_strip_sgid(struct mnt_idmap *idmap,
1783  			const struct inode *dir, umode_t mode);
1784  
1785  /*
1786   * This is the "filldir" function type, used by readdir() to let
1787   * the kernel specify what kind of dirent layout it wants to have.
1788   * This allows the kernel to read directories into kernel space or
1789   * to have different dirent layouts depending on the binary type.
1790   * Return 'true' to keep going and 'false' if there are no more entries.
1791   */
1792  struct dir_context;
1793  typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
1794  			 unsigned);
1795  
1796  struct dir_context {
1797  	filldir_t actor;
1798  	loff_t pos;
1799  };
1800  
1801  /*
1802   * These flags let !MMU mmap() govern direct device mapping vs immediate
1803   * copying more easily for MAP_PRIVATE, especially for ROM filesystems.
1804   *
1805   * NOMMU_MAP_COPY:	Copy can be mapped (MAP_PRIVATE)
1806   * NOMMU_MAP_DIRECT:	Can be mapped directly (MAP_SHARED)
1807   * NOMMU_MAP_READ:	Can be mapped for reading
1808   * NOMMU_MAP_WRITE:	Can be mapped for writing
1809   * NOMMU_MAP_EXEC:	Can be mapped for execution
1810   */
1811  #define NOMMU_MAP_COPY		0x00000001
1812  #define NOMMU_MAP_DIRECT	0x00000008
1813  #define NOMMU_MAP_READ		VM_MAYREAD
1814  #define NOMMU_MAP_WRITE		VM_MAYWRITE
1815  #define NOMMU_MAP_EXEC		VM_MAYEXEC
1816  
1817  #define NOMMU_VMFLAGS \
1818  	(NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)
1819  
1820  /*
1821   * These flags control the behavior of the remap_file_range function pointer.
1822   * If it is called with len == 0 that means "remap to end of source file".
1823   * See Documentation/filesystems/vfs.rst for more details about this call.
1824   *
1825   * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
1826   * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
1827   */
1828  #define REMAP_FILE_DEDUP		(1 << 0)
1829  #define REMAP_FILE_CAN_SHORTEN		(1 << 1)
1830  
1831  /*
1832   * These flags signal that the caller is ok with altering various aspects of
1833   * the behavior of the remap operation.  The changes must be made by the
1834   * implementation; the vfs remap helper functions can take advantage of them.
1835   * Flags in this category exist to preserve the quirky behavior of the hoisted
1836   * btrfs clone/dedupe ioctls.
1837   */
1838  #define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN)
1839  
1840  /*
1841   * These flags control the behavior of vfs_copy_file_range().
1842   * They are not available to the user via syscall.
1843   *
1844   * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops
1845   */
1846  #define COPY_FILE_SPLICE		(1 << 0)
1847  
1848  struct iov_iter;
1849  struct io_uring_cmd;
1850  struct offset_ctx;
1851  
1852  struct file_operations {
1853  	struct module *owner;
1854  	loff_t (*llseek) (struct file *, loff_t, int);
1855  	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
1856  	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
1857  	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
1858  	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
1859  	int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
1860  			unsigned int flags);
1861  	int (*iterate_shared) (struct file *, struct dir_context *);
1862  	__poll_t (*poll) (struct file *, struct poll_table_struct *);
1863  	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
1864  	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
1865  	int (*mmap) (struct file *, struct vm_area_struct *);
1866  	unsigned long mmap_supported_flags;
1867  	int (*open) (struct inode *, struct file *);
1868  	int (*flush) (struct file *, fl_owner_t id);
1869  	int (*release) (struct inode *, struct file *);
1870  	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
1871  	int (*fasync) (int, struct file *, int);
1872  	int (*lock) (struct file *, int, struct file_lock *);
1873  	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
1874  	int (*check_flags)(int);
1875  	int (*flock) (struct file *, int, struct file_lock *);
1876  	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
1877  	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
1878  	void (*splice_eof)(struct file *file);
1879  	int (*setlease)(struct file *, int, struct file_lock **, void **);
1880  	long (*fallocate)(struct file *file, int mode, loff_t offset,
1881  			  loff_t len);
1882  	void (*show_fdinfo)(struct seq_file *m, struct file *f);
1883  #ifndef CONFIG_MMU
1884  	unsigned (*mmap_capabilities)(struct file *);
1885  #endif
1886  	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
1887  			loff_t, size_t, unsigned int);
1888  	loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
1889  				   struct file *file_out, loff_t pos_out,
1890  				   loff_t len, unsigned int remap_flags);
1891  	int (*fadvise)(struct file *, loff_t, loff_t, int);
1892  	int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
1893  	int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
1894  				unsigned int poll_flags);
1895  } __randomize_layout;
1896  
1897  /* Wrap a directory iterator that needs exclusive inode access */
1898  int wrap_directory_iterator(struct file *, struct dir_context *,
1899  			    int (*) (struct file *, struct dir_context *));
1900  #define WRAP_DIR_ITER(x) \
1901  	static int shared_##x(struct file *file , struct dir_context *ctx) \
1902  	{ return wrap_directory_iterator(file, ctx, x); }
1903  
1904  struct inode_operations {
1905  	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
1906  	const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
1907  	int (*permission) (struct mnt_idmap *, struct inode *, int);
1908  	struct posix_acl * (*get_inode_acl)(struct inode *, int, bool);
1909  
1910  	int (*readlink) (struct dentry *, char __user *,int);
1911  
1912  	int (*create) (struct mnt_idmap *, struct inode *,struct dentry *,
1913  		       umode_t, bool);
1914  	int (*link) (struct dentry *,struct inode *,struct dentry *);
1915  	int (*unlink) (struct inode *,struct dentry *);
1916  	int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *,
1917  			const char *);
1918  	int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,
1919  		      umode_t);
1920  	int (*rmdir) (struct inode *,struct dentry *);
1921  	int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *,
1922  		      umode_t,dev_t);
1923  	int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *,
1924  			struct inode *, struct dentry *, unsigned int);
1925  	int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *);
1926  	int (*getattr) (struct mnt_idmap *, const struct path *,
1927  			struct kstat *, u32, unsigned int);
1928  	ssize_t (*listxattr) (struct dentry *, char *, size_t);
1929  	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
1930  		      u64 len);
1931  	int (*update_time)(struct inode *, int);
1932  	int (*atomic_open)(struct inode *, struct dentry *,
1933  			   struct file *, unsigned open_flag,
1934  			   umode_t create_mode);
1935  	int (*tmpfile) (struct mnt_idmap *, struct inode *,
1936  			struct file *, umode_t);
1937  	struct posix_acl *(*get_acl)(struct mnt_idmap *, struct dentry *,
1938  				     int);
1939  	int (*set_acl)(struct mnt_idmap *, struct dentry *,
1940  		       struct posix_acl *, int);
1941  	int (*fileattr_set)(struct mnt_idmap *idmap,
1942  			    struct dentry *dentry, struct fileattr *fa);
1943  	int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
1944  	struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
1945  } ____cacheline_aligned;
1946  
call_read_iter(struct file * file,struct kiocb * kio,struct iov_iter * iter)1947  static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
1948  				     struct iov_iter *iter)
1949  {
1950  	return file->f_op->read_iter(kio, iter);
1951  }
1952  
call_write_iter(struct file * file,struct kiocb * kio,struct iov_iter * iter)1953  static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
1954  				      struct iov_iter *iter)
1955  {
1956  	return file->f_op->write_iter(kio, iter);
1957  }
1958  
call_mmap(struct file * file,struct vm_area_struct * vma)1959  static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
1960  {
1961  	return file->f_op->mmap(file, vma);
1962  }
1963  
1964  extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
1965  extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
1966  extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
1967  				   loff_t, size_t, unsigned int);
1968  extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
1969  				       struct file *file_out, loff_t pos_out,
1970  				       size_t len, unsigned int flags);
1971  int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
1972  				    struct file *file_out, loff_t pos_out,
1973  				    loff_t *len, unsigned int remap_flags,
1974  				    const struct iomap_ops *dax_read_ops);
1975  int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
1976  				  struct file *file_out, loff_t pos_out,
1977  				  loff_t *count, unsigned int remap_flags);
1978  extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
1979  				  struct file *file_out, loff_t pos_out,
1980  				  loff_t len, unsigned int remap_flags);
1981  extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1982  				   struct file *file_out, loff_t pos_out,
1983  				   loff_t len, unsigned int remap_flags);
1984  extern int vfs_dedupe_file_range(struct file *file,
1985  				 struct file_dedupe_range *same);
1986  extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
1987  					struct file *dst_file, loff_t dst_pos,
1988  					loff_t len, unsigned int remap_flags);
1989  
1990  enum freeze_holder {
1991  	FREEZE_HOLDER_KERNEL	= (1U << 0),
1992  	FREEZE_HOLDER_USERSPACE	= (1U << 1),
1993  };
1994  
1995  struct super_operations {
1996     	struct inode *(*alloc_inode)(struct super_block *sb);
1997  	void (*destroy_inode)(struct inode *);
1998  	void (*free_inode)(struct inode *);
1999  
2000     	void (*dirty_inode) (struct inode *, int flags);
2001  	int (*write_inode) (struct inode *, struct writeback_control *wbc);
2002  	int (*drop_inode) (struct inode *);
2003  	void (*evict_inode) (struct inode *);
2004  	void (*put_super) (struct super_block *);
2005  	int (*sync_fs)(struct super_block *sb, int wait);
2006  	int (*freeze_super) (struct super_block *, enum freeze_holder who);
2007  	int (*freeze_fs) (struct super_block *);
2008  	int (*thaw_super) (struct super_block *, enum freeze_holder who);
2009  	int (*unfreeze_fs) (struct super_block *);
2010  	int (*statfs) (struct dentry *, struct kstatfs *);
2011  	int (*remount_fs) (struct super_block *, int *, char *);
2012  	void (*umount_begin) (struct super_block *);
2013  
2014  	int (*show_options)(struct seq_file *, struct dentry *);
2015  	int (*show_devname)(struct seq_file *, struct dentry *);
2016  	int (*show_path)(struct seq_file *, struct dentry *);
2017  	int (*show_stats)(struct seq_file *, struct dentry *);
2018  #ifdef CONFIG_QUOTA
2019  	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
2020  	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
2021  	struct dquot **(*get_dquots)(struct inode *);
2022  #endif
2023  	long (*nr_cached_objects)(struct super_block *,
2024  				  struct shrink_control *);
2025  	long (*free_cached_objects)(struct super_block *,
2026  				    struct shrink_control *);
2027  	void (*shutdown)(struct super_block *sb);
2028  };
2029  
2030  /*
2031   * Inode flags - they have no relation to superblock flags now
2032   */
2033  #define S_SYNC		(1 << 0)  /* Writes are synced at once */
2034  #define S_NOATIME	(1 << 1)  /* Do not update access times */
2035  #define S_APPEND	(1 << 2)  /* Append-only file */
2036  #define S_IMMUTABLE	(1 << 3)  /* Immutable file */
2037  #define S_DEAD		(1 << 4)  /* removed, but still open directory */
2038  #define S_NOQUOTA	(1 << 5)  /* Inode is not counted to quota */
2039  #define S_DIRSYNC	(1 << 6)  /* Directory modifications are synchronous */
2040  #define S_NOCMTIME	(1 << 7)  /* Do not update file c/mtime */
2041  #define S_SWAPFILE	(1 << 8)  /* Do not truncate: swapon got its bmaps */
2042  #define S_PRIVATE	(1 << 9)  /* Inode is fs-internal */
2043  #define S_IMA		(1 << 10) /* Inode has an associated IMA struct */
2044  #define S_AUTOMOUNT	(1 << 11) /* Automount/referral quasi-directory */
2045  #define S_NOSEC		(1 << 12) /* no suid or xattr security attributes */
2046  #ifdef CONFIG_FS_DAX
2047  #define S_DAX		(1 << 13) /* Direct Access, avoiding the page cache */
2048  #else
2049  #define S_DAX		0	  /* Make all the DAX code disappear */
2050  #endif
2051  #define S_ENCRYPTED	(1 << 14) /* Encrypted file (using fs/crypto/) */
2052  #define S_CASEFOLD	(1 << 15) /* Casefolded file */
2053  #define S_VERITY	(1 << 16) /* Verity file (using fs/verity/) */
2054  #define S_KERNEL_FILE	(1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
2055  
2056  /*
2057   * Note that nosuid etc flags are inode-specific: setting some file-system
2058   * flags just means all the inodes inherit those flags by default. It might be
2059   * possible to override it selectively if you really wanted to with some
2060   * ioctl() that is not currently implemented.
2061   *
2062   * Exception: SB_RDONLY is always applied to the entire file system.
2063   *
2064   * Unfortunately, it is possible to change a filesystems flags with it mounted
2065   * with files in use.  This means that all of the inodes will not have their
2066   * i_flags updated.  Hence, i_flags no longer inherit the superblock mount
2067   * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org
2068   */
2069  #define __IS_FLG(inode, flg)	((inode)->i_sb->s_flags & (flg))
2070  
sb_rdonly(const struct super_block * sb)2071  static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; }
2072  #define IS_RDONLY(inode)	sb_rdonly((inode)->i_sb)
2073  #define IS_SYNC(inode)		(__IS_FLG(inode, SB_SYNCHRONOUS) || \
2074  					((inode)->i_flags & S_SYNC))
2075  #define IS_DIRSYNC(inode)	(__IS_FLG(inode, SB_SYNCHRONOUS|SB_DIRSYNC) || \
2076  					((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
2077  #define IS_MANDLOCK(inode)	__IS_FLG(inode, SB_MANDLOCK)
2078  #define IS_NOATIME(inode)	__IS_FLG(inode, SB_RDONLY|SB_NOATIME)
2079  #define IS_I_VERSION(inode)	__IS_FLG(inode, SB_I_VERSION)
2080  
2081  #define IS_NOQUOTA(inode)	((inode)->i_flags & S_NOQUOTA)
2082  #define IS_APPEND(inode)	((inode)->i_flags & S_APPEND)
2083  #define IS_IMMUTABLE(inode)	((inode)->i_flags & S_IMMUTABLE)
2084  #define IS_POSIXACL(inode)	__IS_FLG(inode, SB_POSIXACL)
2085  
2086  #define IS_DEADDIR(inode)	((inode)->i_flags & S_DEAD)
2087  #define IS_NOCMTIME(inode)	((inode)->i_flags & S_NOCMTIME)
2088  #define IS_SWAPFILE(inode)	((inode)->i_flags & S_SWAPFILE)
2089  #define IS_PRIVATE(inode)	((inode)->i_flags & S_PRIVATE)
2090  #define IS_IMA(inode)		((inode)->i_flags & S_IMA)
2091  #define IS_AUTOMOUNT(inode)	((inode)->i_flags & S_AUTOMOUNT)
2092  #define IS_NOSEC(inode)		((inode)->i_flags & S_NOSEC)
2093  #define IS_DAX(inode)		((inode)->i_flags & S_DAX)
2094  #define IS_ENCRYPTED(inode)	((inode)->i_flags & S_ENCRYPTED)
2095  #define IS_CASEFOLDED(inode)	((inode)->i_flags & S_CASEFOLD)
2096  #define IS_VERITY(inode)	((inode)->i_flags & S_VERITY)
2097  
2098  #define IS_WHITEOUT(inode)	(S_ISCHR(inode->i_mode) && \
2099  				 (inode)->i_rdev == WHITEOUT_DEV)
2100  
HAS_UNMAPPED_ID(struct mnt_idmap * idmap,struct inode * inode)2101  static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap,
2102  				   struct inode *inode)
2103  {
2104  	return !vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) ||
2105  	       !vfsgid_valid(i_gid_into_vfsgid(idmap, inode));
2106  }
2107  
init_sync_kiocb(struct kiocb * kiocb,struct file * filp)2108  static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
2109  {
2110  	*kiocb = (struct kiocb) {
2111  		.ki_filp = filp,
2112  		.ki_flags = filp->f_iocb_flags,
2113  		.ki_ioprio = get_current_ioprio(),
2114  	};
2115  }
2116  
kiocb_clone(struct kiocb * kiocb,struct kiocb * kiocb_src,struct file * filp)2117  static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
2118  			       struct file *filp)
2119  {
2120  	*kiocb = (struct kiocb) {
2121  		.ki_filp = filp,
2122  		.ki_flags = kiocb_src->ki_flags,
2123  		.ki_ioprio = kiocb_src->ki_ioprio,
2124  		.ki_pos = kiocb_src->ki_pos,
2125  	};
2126  }
2127  
2128  /*
2129   * Inode state bits.  Protected by inode->i_lock
2130   *
2131   * Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
2132   * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
2133   *
2134   * Four bits define the lifetime of an inode.  Initially, inodes are I_NEW,
2135   * until that flag is cleared.  I_WILL_FREE, I_FREEING and I_CLEAR are set at
2136   * various stages of removing an inode.
2137   *
2138   * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
2139   *
2140   * I_DIRTY_SYNC		Inode is dirty, but doesn't have to be written on
2141   *			fdatasync() (unless I_DIRTY_DATASYNC is also set).
2142   *			Timestamp updates are the usual cause.
2143   * I_DIRTY_DATASYNC	Data-related inode changes pending.  We keep track of
2144   *			these changes separately from I_DIRTY_SYNC so that we
2145   *			don't have to write inode on fdatasync() when only
2146   *			e.g. the timestamps have changed.
2147   * I_DIRTY_PAGES	Inode has dirty pages.  Inode itself may be clean.
2148   * I_DIRTY_TIME		The inode itself has dirty timestamps, and the
2149   *			lazytime mount option is enabled.  We keep track of this
2150   *			separately from I_DIRTY_SYNC in order to implement
2151   *			lazytime.  This gets cleared if I_DIRTY_INODE
2152   *			(I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
2153   *			I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
2154   *			in place because writeback might already be in progress
2155   *			and we don't want to lose the time update
2156   * I_NEW		Serves as both a mutex and completion notification.
2157   *			New inodes set I_NEW.  If two processes both create
2158   *			the same inode, one of them will release its inode and
2159   *			wait for I_NEW to be released before returning.
2160   *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
2161   *			also cause waiting on I_NEW, without I_NEW actually
2162   *			being set.  find_inode() uses this to prevent returning
2163   *			nearly-dead inodes.
2164   * I_WILL_FREE		Must be set when calling write_inode_now() if i_count
2165   *			is zero.  I_FREEING must be set when I_WILL_FREE is
2166   *			cleared.
2167   * I_FREEING		Set when inode is about to be freed but still has dirty
2168   *			pages or buffers attached or the inode itself is still
2169   *			dirty.
2170   * I_CLEAR		Added by clear_inode().  In this state the inode is
2171   *			clean and can be destroyed.  Inode keeps I_FREEING.
2172   *
2173   *			Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
2174   *			prohibited for many purposes.  iget() must wait for
2175   *			the inode to be completely released, then create it
2176   *			anew.  Other functions will just ignore such inodes,
2177   *			if appropriate.  I_NEW is used for waiting.
2178   *
2179   * I_SYNC		Writeback of inode is running. The bit is set during
2180   *			data writeback, and cleared with a wakeup on the bit
2181   *			address once it is done. The bit is also used to pin
2182   *			the inode in memory for flusher thread.
2183   *
2184   * I_REFERENCED		Marks the inode as recently references on the LRU list.
2185   *
2186   * I_DIO_WAKEUP		Never set.  Only used as a key for wait_on_bit().
2187   *
2188   * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
2189   *			synchronize competing switching instances and to tell
2190   *			wb stat updates to grab the i_pages lock.  See
2191   *			inode_switch_wbs_work_fn() for details.
2192   *
2193   * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
2194   *			and work dirs among overlayfs mounts.
2195   *
2196   * I_CREATING		New object's inode in the middle of setting up.
2197   *
2198   * I_DONTCACHE		Evict inode as soon as it is not used anymore.
2199   *
2200   * I_SYNC_QUEUED	Inode is queued in b_io or b_more_io writeback lists.
2201   *			Used to detect that mark_inode_dirty() should not move
2202   * 			inode between dirty lists.
2203   *
2204   * I_PINNING_FSCACHE_WB	Inode is pinning an fscache object for writeback.
2205   *
2206   * Q: What is the difference between I_WILL_FREE and I_FREEING?
2207   */
2208  #define I_DIRTY_SYNC		(1 << 0)
2209  #define I_DIRTY_DATASYNC	(1 << 1)
2210  #define I_DIRTY_PAGES		(1 << 2)
2211  #define __I_NEW			3
2212  #define I_NEW			(1 << __I_NEW)
2213  #define I_WILL_FREE		(1 << 4)
2214  #define I_FREEING		(1 << 5)
2215  #define I_CLEAR			(1 << 6)
2216  #define __I_SYNC		7
2217  #define I_SYNC			(1 << __I_SYNC)
2218  #define I_REFERENCED		(1 << 8)
2219  #define __I_DIO_WAKEUP		9
2220  #define I_DIO_WAKEUP		(1 << __I_DIO_WAKEUP)
2221  #define I_LINKABLE		(1 << 10)
2222  #define I_DIRTY_TIME		(1 << 11)
2223  #define I_WB_SWITCH		(1 << 13)
2224  #define I_OVL_INUSE		(1 << 14)
2225  #define I_CREATING		(1 << 15)
2226  #define I_DONTCACHE		(1 << 16)
2227  #define I_SYNC_QUEUED		(1 << 17)
2228  #define I_PINNING_FSCACHE_WB	(1 << 18)
2229  
2230  #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
2231  #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
2232  #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
2233  
2234  extern void __mark_inode_dirty(struct inode *, int);
mark_inode_dirty(struct inode * inode)2235  static inline void mark_inode_dirty(struct inode *inode)
2236  {
2237  	__mark_inode_dirty(inode, I_DIRTY);
2238  }
2239  
mark_inode_dirty_sync(struct inode * inode)2240  static inline void mark_inode_dirty_sync(struct inode *inode)
2241  {
2242  	__mark_inode_dirty(inode, I_DIRTY_SYNC);
2243  }
2244  
2245  /*
2246   * Returns true if the given inode itself only has dirty timestamps (its pages
2247   * may still be dirty) and isn't currently being allocated or freed.
2248   * Filesystems should call this if when writing an inode when lazytime is
2249   * enabled, they want to opportunistically write the timestamps of other inodes
2250   * located very nearby on-disk, e.g. in the same inode block.  This returns true
2251   * if the given inode is in need of such an opportunistic update.  Requires
2252   * i_lock, or at least later re-checking under i_lock.
2253   */
inode_is_dirtytime_only(struct inode * inode)2254  static inline bool inode_is_dirtytime_only(struct inode *inode)
2255  {
2256  	return (inode->i_state & (I_DIRTY_TIME | I_NEW |
2257  				  I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME;
2258  }
2259  
2260  extern void inc_nlink(struct inode *inode);
2261  extern void drop_nlink(struct inode *inode);
2262  extern void clear_nlink(struct inode *inode);
2263  extern void set_nlink(struct inode *inode, unsigned int nlink);
2264  
inode_inc_link_count(struct inode * inode)2265  static inline void inode_inc_link_count(struct inode *inode)
2266  {
2267  	inc_nlink(inode);
2268  	mark_inode_dirty(inode);
2269  }
2270  
inode_dec_link_count(struct inode * inode)2271  static inline void inode_dec_link_count(struct inode *inode)
2272  {
2273  	drop_nlink(inode);
2274  	mark_inode_dirty(inode);
2275  }
2276  
2277  enum file_time_flags {
2278  	S_ATIME = 1,
2279  	S_MTIME = 2,
2280  	S_CTIME = 4,
2281  	S_VERSION = 8,
2282  };
2283  
2284  extern bool atime_needs_update(const struct path *, struct inode *);
2285  extern void touch_atime(const struct path *);
2286  int inode_update_time(struct inode *inode, int flags);
2287  
file_accessed(struct file * file)2288  static inline void file_accessed(struct file *file)
2289  {
2290  	if (!(file->f_flags & O_NOATIME))
2291  		touch_atime(&file->f_path);
2292  }
2293  
2294  extern int file_modified(struct file *file);
2295  int kiocb_modified(struct kiocb *iocb);
2296  
2297  int sync_inode_metadata(struct inode *inode, int wait);
2298  
2299  struct file_system_type {
2300  	const char *name;
2301  	int fs_flags;
2302  #define FS_REQUIRES_DEV		1
2303  #define FS_BINARY_MOUNTDATA	2
2304  #define FS_HAS_SUBTYPE		4
2305  #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
2306  #define FS_DISALLOW_NOTIFY_PERM	16	/* Disable fanotify permission events */
2307  #define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
2308  #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
2309  	int (*init_fs_context)(struct fs_context *);
2310  	const struct fs_parameter_spec *parameters;
2311  	struct dentry *(*mount) (struct file_system_type *, int,
2312  		       const char *, void *);
2313  	void (*kill_sb) (struct super_block *);
2314  	struct module *owner;
2315  	struct file_system_type * next;
2316  	struct hlist_head fs_supers;
2317  
2318  	struct lock_class_key s_lock_key;
2319  	struct lock_class_key s_umount_key;
2320  	struct lock_class_key s_vfs_rename_key;
2321  	struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
2322  
2323  	struct lock_class_key i_lock_key;
2324  	struct lock_class_key i_mutex_key;
2325  	struct lock_class_key invalidate_lock_key;
2326  	struct lock_class_key i_mutex_dir_key;
2327  };
2328  
2329  #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
2330  
2331  extern struct dentry *mount_bdev(struct file_system_type *fs_type,
2332  	int flags, const char *dev_name, void *data,
2333  	int (*fill_super)(struct super_block *, void *, int));
2334  extern struct dentry *mount_single(struct file_system_type *fs_type,
2335  	int flags, void *data,
2336  	int (*fill_super)(struct super_block *, void *, int));
2337  extern struct dentry *mount_nodev(struct file_system_type *fs_type,
2338  	int flags, void *data,
2339  	int (*fill_super)(struct super_block *, void *, int));
2340  extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
2341  void retire_super(struct super_block *sb);
2342  void generic_shutdown_super(struct super_block *sb);
2343  void kill_block_super(struct super_block *sb);
2344  void kill_anon_super(struct super_block *sb);
2345  void kill_litter_super(struct super_block *sb);
2346  void deactivate_super(struct super_block *sb);
2347  void deactivate_locked_super(struct super_block *sb);
2348  int set_anon_super(struct super_block *s, void *data);
2349  int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
2350  int get_anon_bdev(dev_t *);
2351  void free_anon_bdev(dev_t);
2352  struct super_block *sget_fc(struct fs_context *fc,
2353  			    int (*test)(struct super_block *, struct fs_context *),
2354  			    int (*set)(struct super_block *, struct fs_context *));
2355  struct super_block *sget(struct file_system_type *type,
2356  			int (*test)(struct super_block *,void *),
2357  			int (*set)(struct super_block *,void *),
2358  			int flags, void *data);
2359  struct super_block *sget_dev(struct fs_context *fc, dev_t dev);
2360  
2361  /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
2362  #define fops_get(fops) \
2363  	(((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
2364  #define fops_put(fops) \
2365  	do { if (fops) module_put((fops)->owner); } while(0)
2366  /*
2367   * This one is to be used *ONLY* from ->open() instances.
2368   * fops must be non-NULL, pinned down *and* module dependencies
2369   * should be sufficient to pin the caller down as well.
2370   */
2371  #define replace_fops(f, fops) \
2372  	do {	\
2373  		struct file *__file = (f); \
2374  		fops_put(__file->f_op); \
2375  		BUG_ON(!(__file->f_op = (fops))); \
2376  	} while(0)
2377  
2378  extern int register_filesystem(struct file_system_type *);
2379  extern int unregister_filesystem(struct file_system_type *);
2380  extern int vfs_statfs(const struct path *, struct kstatfs *);
2381  extern int user_statfs(const char __user *, struct kstatfs *);
2382  extern int fd_statfs(int, struct kstatfs *);
2383  int freeze_super(struct super_block *super, enum freeze_holder who);
2384  int thaw_super(struct super_block *super, enum freeze_holder who);
2385  extern __printf(2, 3)
2386  int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
2387  extern int super_setup_bdi(struct super_block *sb);
2388  
2389  extern int current_umask(void);
2390  
2391  extern void ihold(struct inode * inode);
2392  extern void iput(struct inode *);
2393  int inode_update_timestamps(struct inode *inode, int flags);
2394  int generic_update_time(struct inode *, int);
2395  
2396  /* /sys/fs */
2397  extern struct kobject *fs_kobj;
2398  
2399  #define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
2400  
2401  /* fs/open.c */
2402  struct audit_names;
2403  struct filename {
2404  	const char		*name;	/* pointer to actual string */
2405  	const __user char	*uptr;	/* original userland pointer */
2406  	atomic_t		refcnt;
2407  	struct audit_names	*aname;
2408  	const char		iname[];
2409  };
2410  static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);
2411  
file_mnt_idmap(struct file * file)2412  static inline struct mnt_idmap *file_mnt_idmap(struct file *file)
2413  {
2414  	return mnt_idmap(file->f_path.mnt);
2415  }
2416  
2417  /**
2418   * is_idmapped_mnt - check whether a mount is mapped
2419   * @mnt: the mount to check
2420   *
2421   * If @mnt has an non @nop_mnt_idmap attached to it then @mnt is mapped.
2422   *
2423   * Return: true if mount is mapped, false if not.
2424   */
is_idmapped_mnt(const struct vfsmount * mnt)2425  static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
2426  {
2427  	return mnt_idmap(mnt) != &nop_mnt_idmap;
2428  }
2429  
2430  extern long vfs_truncate(const struct path *, loff_t);
2431  int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start,
2432  		unsigned int time_attrs, struct file *filp);
2433  extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
2434  			loff_t len);
2435  extern long do_sys_open(int dfd, const char __user *filename, int flags,
2436  			umode_t mode);
2437  extern struct file *file_open_name(struct filename *, int, umode_t);
2438  extern struct file *filp_open(const char *, int, umode_t);
2439  extern struct file *file_open_root(const struct path *,
2440  				   const char *, int, umode_t);
file_open_root_mnt(struct vfsmount * mnt,const char * name,int flags,umode_t mode)2441  static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
2442  				   const char *name, int flags, umode_t mode)
2443  {
2444  	return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
2445  			      name, flags, mode);
2446  }
2447  struct file *dentry_open(const struct path *path, int flags,
2448  			 const struct cred *creds);
2449  struct file *dentry_create(const struct path *path, int flags, umode_t mode,
2450  			   const struct cred *cred);
2451  struct file *backing_file_open(const struct path *path, int flags,
2452  			       const struct path *real_path,
2453  			       const struct cred *cred);
2454  struct path *backing_file_real_path(struct file *f);
2455  
2456  /*
2457   * file_real_path - get the path corresponding to f_inode
2458   *
2459   * When opening a backing file for a stackable filesystem (e.g.,
2460   * overlayfs) f_path may be on the stackable filesystem and f_inode on
2461   * the underlying filesystem.  When the path associated with f_inode is
2462   * needed, this helper should be used instead of accessing f_path
2463   * directly.
2464  */
file_real_path(struct file * f)2465  static inline const struct path *file_real_path(struct file *f)
2466  {
2467  	if (unlikely(f->f_mode & FMODE_BACKING))
2468  		return backing_file_real_path(f);
2469  	return &f->f_path;
2470  }
2471  
file_clone_open(struct file * file)2472  static inline struct file *file_clone_open(struct file *file)
2473  {
2474  	return dentry_open(&file->f_path, file->f_flags, file->f_cred);
2475  }
2476  extern int filp_close(struct file *, fl_owner_t id);
2477  
2478  extern struct filename *getname_flags(const char __user *, int, int *);
2479  extern struct filename *getname_uflags(const char __user *, int);
2480  extern struct filename *getname(const char __user *);
2481  extern struct filename *getname_kernel(const char *);
2482  extern void putname(struct filename *name);
2483  
2484  extern int finish_open(struct file *file, struct dentry *dentry,
2485  			int (*open)(struct inode *, struct file *));
2486  extern int finish_no_open(struct file *file, struct dentry *dentry);
2487  
2488  /* Helper for the simple case when original dentry is used */
finish_open_simple(struct file * file,int error)2489  static inline int finish_open_simple(struct file *file, int error)
2490  {
2491  	if (error)
2492  		return error;
2493  
2494  	return finish_open(file, file->f_path.dentry, NULL);
2495  }
2496  
2497  /* fs/dcache.c */
2498  extern void __init vfs_caches_init_early(void);
2499  extern void __init vfs_caches_init(void);
2500  
2501  extern struct kmem_cache *names_cachep;
2502  
2503  #define __getname()		kmem_cache_alloc(names_cachep, GFP_KERNEL)
2504  #define __putname(name)		kmem_cache_free(names_cachep, (void *)(name))
2505  
2506  extern struct super_block *blockdev_superblock;
sb_is_blkdev_sb(struct super_block * sb)2507  static inline bool sb_is_blkdev_sb(struct super_block *sb)
2508  {
2509  	return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
2510  }
2511  
2512  void emergency_thaw_all(void);
2513  extern int sync_filesystem(struct super_block *);
2514  extern const struct file_operations def_blk_fops;
2515  extern const struct file_operations def_chr_fops;
2516  
2517  /* fs/char_dev.c */
2518  #define CHRDEV_MAJOR_MAX 512
2519  /* Marks the bottom of the first segment of free char majors */
2520  #define CHRDEV_MAJOR_DYN_END 234
2521  /* Marks the top and bottom of the second segment of free char majors */
2522  #define CHRDEV_MAJOR_DYN_EXT_START 511
2523  #define CHRDEV_MAJOR_DYN_EXT_END 384
2524  
2525  extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
2526  extern int register_chrdev_region(dev_t, unsigned, const char *);
2527  extern int __register_chrdev(unsigned int major, unsigned int baseminor,
2528  			     unsigned int count, const char *name,
2529  			     const struct file_operations *fops);
2530  extern void __unregister_chrdev(unsigned int major, unsigned int baseminor,
2531  				unsigned int count, const char *name);
2532  extern void unregister_chrdev_region(dev_t, unsigned);
2533  extern void chrdev_show(struct seq_file *,off_t);
2534  
register_chrdev(unsigned int major,const char * name,const struct file_operations * fops)2535  static inline int register_chrdev(unsigned int major, const char *name,
2536  				  const struct file_operations *fops)
2537  {
2538  	return __register_chrdev(major, 0, 256, name, fops);
2539  }
2540  
unregister_chrdev(unsigned int major,const char * name)2541  static inline void unregister_chrdev(unsigned int major, const char *name)
2542  {
2543  	__unregister_chrdev(major, 0, 256, name);
2544  }
2545  
2546  extern void init_special_inode(struct inode *, umode_t, dev_t);
2547  
2548  /* Invalid inode operations -- fs/bad_inode.c */
2549  extern void make_bad_inode(struct inode *);
2550  extern bool is_bad_inode(struct inode *);
2551  
2552  extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
2553  						loff_t lend);
2554  extern int __must_check file_check_and_advance_wb_err(struct file *file);
2555  extern int __must_check file_write_and_wait_range(struct file *file,
2556  						loff_t start, loff_t end);
2557  
file_write_and_wait(struct file * file)2558  static inline int file_write_and_wait(struct file *file)
2559  {
2560  	return file_write_and_wait_range(file, 0, LLONG_MAX);
2561  }
2562  
2563  extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
2564  			   int datasync);
2565  extern int vfs_fsync(struct file *file, int datasync);
2566  
2567  extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
2568  				unsigned int flags);
2569  
iocb_is_dsync(const struct kiocb * iocb)2570  static inline bool iocb_is_dsync(const struct kiocb *iocb)
2571  {
2572  	return (iocb->ki_flags & IOCB_DSYNC) ||
2573  		IS_SYNC(iocb->ki_filp->f_mapping->host);
2574  }
2575  
2576  /*
2577   * Sync the bytes written if this was a synchronous write.  Expect ki_pos
2578   * to already be updated for the write, and will return either the amount
2579   * of bytes passed in, or an error if syncing the file failed.
2580   */
generic_write_sync(struct kiocb * iocb,ssize_t count)2581  static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count)
2582  {
2583  	if (iocb_is_dsync(iocb)) {
2584  		int ret = vfs_fsync_range(iocb->ki_filp,
2585  				iocb->ki_pos - count, iocb->ki_pos - 1,
2586  				(iocb->ki_flags & IOCB_SYNC) ? 0 : 1);
2587  		if (ret)
2588  			return ret;
2589  	}
2590  
2591  	return count;
2592  }
2593  
2594  extern void emergency_sync(void);
2595  extern void emergency_remount(void);
2596  
2597  #ifdef CONFIG_BLOCK
2598  extern int bmap(struct inode *inode, sector_t *block);
2599  #else
bmap(struct inode * inode,sector_t * block)2600  static inline int bmap(struct inode *inode,  sector_t *block)
2601  {
2602  	return -EINVAL;
2603  }
2604  #endif
2605  
2606  int notify_change(struct mnt_idmap *, struct dentry *,
2607  		  struct iattr *, struct inode **);
2608  int inode_permission(struct mnt_idmap *, struct inode *, int);
2609  int generic_permission(struct mnt_idmap *, struct inode *, int);
file_permission(struct file * file,int mask)2610  static inline int file_permission(struct file *file, int mask)
2611  {
2612  	return inode_permission(file_mnt_idmap(file),
2613  				file_inode(file), mask);
2614  }
path_permission(const struct path * path,int mask)2615  static inline int path_permission(const struct path *path, int mask)
2616  {
2617  	return inode_permission(mnt_idmap(path->mnt),
2618  				d_inode(path->dentry), mask);
2619  }
2620  int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
2621  		   struct inode *inode);
2622  
execute_ok(struct inode * inode)2623  static inline bool execute_ok(struct inode *inode)
2624  {
2625  	return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
2626  }
2627  
inode_wrong_type(const struct inode * inode,umode_t mode)2628  static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
2629  {
2630  	return (inode->i_mode ^ mode) & S_IFMT;
2631  }
2632  
2633  /**
2634   * file_start_write - get write access to a superblock for regular file io
2635   * @file: the file we want to write to
2636   *
2637   * This is a variant of sb_start_write() which is a noop on non-regualr file.
2638   * Should be matched with a call to file_end_write().
2639   */
file_start_write(struct file * file)2640  static inline void file_start_write(struct file *file)
2641  {
2642  	if (!S_ISREG(file_inode(file)->i_mode))
2643  		return;
2644  	sb_start_write(file_inode(file)->i_sb);
2645  }
2646  
file_start_write_trylock(struct file * file)2647  static inline bool file_start_write_trylock(struct file *file)
2648  {
2649  	if (!S_ISREG(file_inode(file)->i_mode))
2650  		return true;
2651  	return sb_start_write_trylock(file_inode(file)->i_sb);
2652  }
2653  
2654  /**
2655   * file_end_write - drop write access to a superblock of a regular file
2656   * @file: the file we wrote to
2657   *
2658   * Should be matched with a call to file_start_write().
2659   */
file_end_write(struct file * file)2660  static inline void file_end_write(struct file *file)
2661  {
2662  	if (!S_ISREG(file_inode(file)->i_mode))
2663  		return;
2664  	sb_end_write(file_inode(file)->i_sb);
2665  }
2666  
2667  /**
2668   * kiocb_start_write - get write access to a superblock for async file io
2669   * @iocb: the io context we want to submit the write with
2670   *
2671   * This is a variant of sb_start_write() for async io submission.
2672   * Should be matched with a call to kiocb_end_write().
2673   */
kiocb_start_write(struct kiocb * iocb)2674  static inline void kiocb_start_write(struct kiocb *iocb)
2675  {
2676  	struct inode *inode = file_inode(iocb->ki_filp);
2677  
2678  	sb_start_write(inode->i_sb);
2679  	/*
2680  	 * Fool lockdep by telling it the lock got released so that it
2681  	 * doesn't complain about the held lock when we return to userspace.
2682  	 */
2683  	__sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
2684  }
2685  
2686  /**
2687   * kiocb_end_write - drop write access to a superblock after async file io
2688   * @iocb: the io context we sumbitted the write with
2689   *
2690   * Should be matched with a call to kiocb_start_write().
2691   */
kiocb_end_write(struct kiocb * iocb)2692  static inline void kiocb_end_write(struct kiocb *iocb)
2693  {
2694  	struct inode *inode = file_inode(iocb->ki_filp);
2695  
2696  	/*
2697  	 * Tell lockdep we inherited freeze protection from submission thread.
2698  	 */
2699  	__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
2700  	sb_end_write(inode->i_sb);
2701  }
2702  
2703  /*
2704   * This is used for regular files where some users -- especially the
2705   * currently executed binary in a process, previously handled via
2706   * VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap
2707   * read-write shared) accesses.
2708   *
2709   * get_write_access() gets write permission for a file.
2710   * put_write_access() releases this write permission.
2711   * deny_write_access() denies write access to a file.
2712   * allow_write_access() re-enables write access to a file.
2713   *
2714   * The i_writecount field of an inode can have the following values:
2715   * 0: no write access, no denied write access
2716   * < 0: (-i_writecount) users that denied write access to the file.
2717   * > 0: (i_writecount) users that have write access to the file.
2718   *
2719   * Normally we operate on that counter with atomic_{inc,dec} and it's safe
2720   * except for the cases where we don't hold i_writecount yet. Then we need to
2721   * use {get,deny}_write_access() - these functions check the sign and refuse
2722   * to do the change if sign is wrong.
2723   */
get_write_access(struct inode * inode)2724  static inline int get_write_access(struct inode *inode)
2725  {
2726  	return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY;
2727  }
deny_write_access(struct file * file)2728  static inline int deny_write_access(struct file *file)
2729  {
2730  	struct inode *inode = file_inode(file);
2731  	return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY;
2732  }
put_write_access(struct inode * inode)2733  static inline void put_write_access(struct inode * inode)
2734  {
2735  	atomic_dec(&inode->i_writecount);
2736  }
allow_write_access(struct file * file)2737  static inline void allow_write_access(struct file *file)
2738  {
2739  	if (file)
2740  		atomic_inc(&file_inode(file)->i_writecount);
2741  }
inode_is_open_for_write(const struct inode * inode)2742  static inline bool inode_is_open_for_write(const struct inode *inode)
2743  {
2744  	return atomic_read(&inode->i_writecount) > 0;
2745  }
2746  
2747  #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
i_readcount_dec(struct inode * inode)2748  static inline void i_readcount_dec(struct inode *inode)
2749  {
2750  	BUG_ON(atomic_dec_return(&inode->i_readcount) < 0);
2751  }
i_readcount_inc(struct inode * inode)2752  static inline void i_readcount_inc(struct inode *inode)
2753  {
2754  	atomic_inc(&inode->i_readcount);
2755  }
2756  #else
i_readcount_dec(struct inode * inode)2757  static inline void i_readcount_dec(struct inode *inode)
2758  {
2759  	return;
2760  }
i_readcount_inc(struct inode * inode)2761  static inline void i_readcount_inc(struct inode *inode)
2762  {
2763  	return;
2764  }
2765  #endif
2766  extern int do_pipe_flags(int *, int);
2767  
2768  extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
2769  ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
2770  extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
2771  extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
2772  extern struct file * open_exec(const char *);
2773  
2774  /* fs/dcache.c -- generic fs support functions */
2775  extern bool is_subdir(struct dentry *, struct dentry *);
2776  extern bool path_is_under(const struct path *, const struct path *);
2777  
2778  extern char *file_path(struct file *, char *, int);
2779  
2780  #include <linux/err.h>
2781  
2782  /* needed for stackable file system support */
2783  extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
2784  
2785  extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
2786  
2787  extern int inode_init_always(struct super_block *, struct inode *);
2788  extern void inode_init_once(struct inode *);
2789  extern void address_space_init_once(struct address_space *mapping);
2790  extern struct inode * igrab(struct inode *);
2791  extern ino_t iunique(struct super_block *, ino_t);
2792  extern int inode_needs_sync(struct inode *inode);
2793  extern int generic_delete_inode(struct inode *inode);
generic_drop_inode(struct inode * inode)2794  static inline int generic_drop_inode(struct inode *inode)
2795  {
2796  	return !inode->i_nlink || inode_unhashed(inode);
2797  }
2798  extern void d_mark_dontcache(struct inode *inode);
2799  
2800  extern struct inode *ilookup5_nowait(struct super_block *sb,
2801  		unsigned long hashval, int (*test)(struct inode *, void *),
2802  		void *data);
2803  extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
2804  		int (*test)(struct inode *, void *), void *data);
2805  extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
2806  
2807  extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
2808  		int (*test)(struct inode *, void *),
2809  		int (*set)(struct inode *, void *),
2810  		void *data);
2811  extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
2812  extern struct inode * iget_locked(struct super_block *, unsigned long);
2813  extern struct inode *find_inode_nowait(struct super_block *,
2814  				       unsigned long,
2815  				       int (*match)(struct inode *,
2816  						    unsigned long, void *),
2817  				       void *data);
2818  extern struct inode *find_inode_rcu(struct super_block *, unsigned long,
2819  				    int (*)(struct inode *, void *), void *);
2820  extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long);
2821  extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
2822  extern int insert_inode_locked(struct inode *);
2823  #ifdef CONFIG_DEBUG_LOCK_ALLOC
2824  extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
2825  #else
lockdep_annotate_inode_mutex_key(struct inode * inode)2826  static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
2827  #endif
2828  extern void unlock_new_inode(struct inode *);
2829  extern void discard_new_inode(struct inode *);
2830  extern unsigned int get_next_ino(void);
2831  extern void evict_inodes(struct super_block *sb);
2832  void dump_mapping(const struct address_space *);
2833  
2834  /*
2835   * Userspace may rely on the inode number being non-zero. For example, glibc
2836   * simply ignores files with zero i_ino in unlink() and other places.
2837   *
2838   * As an additional complication, if userspace was compiled with
2839   * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
2840   * lower 32 bits, so we need to check that those aren't zero explicitly. With
2841   * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
2842   * better safe than sorry.
2843   */
is_zero_ino(ino_t ino)2844  static inline bool is_zero_ino(ino_t ino)
2845  {
2846  	return (u32)ino == 0;
2847  }
2848  
2849  extern void __iget(struct inode * inode);
2850  extern void iget_failed(struct inode *);
2851  extern void clear_inode(struct inode *);
2852  extern void __destroy_inode(struct inode *);
2853  extern struct inode *new_inode_pseudo(struct super_block *sb);
2854  extern struct inode *new_inode(struct super_block *sb);
2855  extern void free_inode_nonrcu(struct inode *inode);
2856  extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
2857  extern int file_remove_privs(struct file *);
2858  int setattr_should_drop_sgid(struct mnt_idmap *idmap,
2859  			     const struct inode *inode);
2860  
2861  /*
2862   * This must be used for allocating filesystems specific inodes to set
2863   * up the inode reclaim context correctly.
2864   */
2865  static inline void *
alloc_inode_sb(struct super_block * sb,struct kmem_cache * cache,gfp_t gfp)2866  alloc_inode_sb(struct super_block *sb, struct kmem_cache *cache, gfp_t gfp)
2867  {
2868  	return kmem_cache_alloc_lru(cache, &sb->s_inode_lru, gfp);
2869  }
2870  
2871  extern void __insert_inode_hash(struct inode *, unsigned long hashval);
insert_inode_hash(struct inode * inode)2872  static inline void insert_inode_hash(struct inode *inode)
2873  {
2874  	__insert_inode_hash(inode, inode->i_ino);
2875  }
2876  
2877  extern void __remove_inode_hash(struct inode *);
remove_inode_hash(struct inode * inode)2878  static inline void remove_inode_hash(struct inode *inode)
2879  {
2880  	if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash))
2881  		__remove_inode_hash(inode);
2882  }
2883  
2884  extern void inode_sb_list_add(struct inode *inode);
2885  extern void inode_add_lru(struct inode *inode);
2886  
2887  extern int sb_set_blocksize(struct super_block *, int);
2888  extern int sb_min_blocksize(struct super_block *, int);
2889  
2890  extern int generic_file_mmap(struct file *, struct vm_area_struct *);
2891  extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
2892  extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
2893  int generic_write_checks_count(struct kiocb *iocb, loff_t *count);
2894  extern int generic_write_check_limits(struct file *file, loff_t pos,
2895  		loff_t *count);
2896  extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
2897  ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to,
2898  		ssize_t already_read);
2899  extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
2900  extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
2901  extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
2902  extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
2903  ssize_t generic_perform_write(struct kiocb *, struct iov_iter *);
2904  ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
2905  		ssize_t direct_written, ssize_t buffered_written);
2906  
2907  ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
2908  		rwf_t flags);
2909  ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
2910  		rwf_t flags);
2911  ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
2912  			   struct iov_iter *iter);
2913  ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
2914  			    struct iov_iter *iter);
2915  
2916  /* fs/splice.c */
2917  ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
2918  			    struct pipe_inode_info *pipe,
2919  			    size_t len, unsigned int flags);
2920  ssize_t copy_splice_read(struct file *in, loff_t *ppos,
2921  			 struct pipe_inode_info *pipe,
2922  			 size_t len, unsigned int flags);
2923  extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
2924  		struct file *, loff_t *, size_t, unsigned int);
2925  extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
2926  		loff_t *opos, size_t len, unsigned int flags);
2927  
2928  
2929  extern void
2930  file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
2931  extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
2932  #define no_llseek NULL
2933  extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
2934  extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
2935  extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
2936  		int whence, loff_t maxsize, loff_t eof);
2937  extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
2938  		int whence, loff_t size);
2939  extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
2940  extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
2941  int rw_verify_area(int, struct file *, const loff_t *, size_t);
2942  extern int generic_file_open(struct inode * inode, struct file * filp);
2943  extern int nonseekable_open(struct inode * inode, struct file * filp);
2944  extern int stream_open(struct inode * inode, struct file * filp);
2945  
2946  #ifdef CONFIG_BLOCK
2947  typedef void (dio_submit_t)(struct bio *bio, struct inode *inode,
2948  			    loff_t file_offset);
2949  
2950  enum {
2951  	/* need locking between buffered and direct access */
2952  	DIO_LOCKING	= 0x01,
2953  
2954  	/* filesystem does not support filling holes */
2955  	DIO_SKIP_HOLES	= 0x02,
2956  };
2957  
2958  ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
2959  			     struct block_device *bdev, struct iov_iter *iter,
2960  			     get_block_t get_block,
2961  			     dio_iodone_t end_io,
2962  			     int flags);
2963  
blockdev_direct_IO(struct kiocb * iocb,struct inode * inode,struct iov_iter * iter,get_block_t get_block)2964  static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
2965  					 struct inode *inode,
2966  					 struct iov_iter *iter,
2967  					 get_block_t get_block)
2968  {
2969  	return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
2970  			get_block, NULL, DIO_LOCKING | DIO_SKIP_HOLES);
2971  }
2972  #endif
2973  
2974  void inode_dio_wait(struct inode *inode);
2975  
2976  /**
2977   * inode_dio_begin - signal start of a direct I/O requests
2978   * @inode: inode the direct I/O happens on
2979   *
2980   * This is called once we've finished processing a direct I/O request,
2981   * and is used to wake up callers waiting for direct I/O to be quiesced.
2982   */
inode_dio_begin(struct inode * inode)2983  static inline void inode_dio_begin(struct inode *inode)
2984  {
2985  	atomic_inc(&inode->i_dio_count);
2986  }
2987  
2988  /**
2989   * inode_dio_end - signal finish of a direct I/O requests
2990   * @inode: inode the direct I/O happens on
2991   *
2992   * This is called once we've finished processing a direct I/O request,
2993   * and is used to wake up callers waiting for direct I/O to be quiesced.
2994   */
inode_dio_end(struct inode * inode)2995  static inline void inode_dio_end(struct inode *inode)
2996  {
2997  	if (atomic_dec_and_test(&inode->i_dio_count))
2998  		wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
2999  }
3000  
3001  extern void inode_set_flags(struct inode *inode, unsigned int flags,
3002  			    unsigned int mask);
3003  
3004  extern const struct file_operations generic_ro_fops;
3005  
3006  #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
3007  
3008  extern int readlink_copy(char __user *, int, const char *);
3009  extern int page_readlink(struct dentry *, char __user *, int);
3010  extern const char *page_get_link(struct dentry *, struct inode *,
3011  				 struct delayed_call *);
3012  extern void page_put_link(void *);
3013  extern int page_symlink(struct inode *inode, const char *symname, int len);
3014  extern const struct inode_operations page_symlink_inode_operations;
3015  extern void kfree_link(void *);
3016  void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
3017  void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
3018  extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
3019  extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
3020  void __inode_add_bytes(struct inode *inode, loff_t bytes);
3021  void inode_add_bytes(struct inode *inode, loff_t bytes);
3022  void __inode_sub_bytes(struct inode *inode, loff_t bytes);
3023  void inode_sub_bytes(struct inode *inode, loff_t bytes);
__inode_get_bytes(struct inode * inode)3024  static inline loff_t __inode_get_bytes(struct inode *inode)
3025  {
3026  	return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
3027  }
3028  loff_t inode_get_bytes(struct inode *inode);
3029  void inode_set_bytes(struct inode *inode, loff_t bytes);
3030  const char *simple_get_link(struct dentry *, struct inode *,
3031  			    struct delayed_call *);
3032  extern const struct inode_operations simple_symlink_inode_operations;
3033  
3034  extern int iterate_dir(struct file *, struct dir_context *);
3035  
3036  int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
3037  		int flags);
3038  int vfs_fstat(int fd, struct kstat *stat);
3039  
vfs_stat(const char __user * filename,struct kstat * stat)3040  static inline int vfs_stat(const char __user *filename, struct kstat *stat)
3041  {
3042  	return vfs_fstatat(AT_FDCWD, filename, stat, 0);
3043  }
vfs_lstat(const char __user * name,struct kstat * stat)3044  static inline int vfs_lstat(const char __user *name, struct kstat *stat)
3045  {
3046  	return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
3047  }
3048  
3049  extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
3050  extern int vfs_readlink(struct dentry *, char __user *, int);
3051  
3052  extern struct file_system_type *get_filesystem(struct file_system_type *fs);
3053  extern void put_filesystem(struct file_system_type *fs);
3054  extern struct file_system_type *get_fs_type(const char *name);
3055  extern struct super_block *get_active_super(struct block_device *bdev);
3056  extern void drop_super(struct super_block *sb);
3057  extern void drop_super_exclusive(struct super_block *sb);
3058  extern void iterate_supers(void (*)(struct super_block *, void *), void *);
3059  extern void iterate_supers_type(struct file_system_type *,
3060  			        void (*)(struct super_block *, void *), void *);
3061  
3062  extern int dcache_dir_open(struct inode *, struct file *);
3063  extern int dcache_dir_close(struct inode *, struct file *);
3064  extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
3065  extern int dcache_readdir(struct file *, struct dir_context *);
3066  extern int simple_setattr(struct mnt_idmap *, struct dentry *,
3067  			  struct iattr *);
3068  extern int simple_getattr(struct mnt_idmap *, const struct path *,
3069  			  struct kstat *, u32, unsigned int);
3070  extern int simple_statfs(struct dentry *, struct kstatfs *);
3071  extern int simple_open(struct inode *inode, struct file *file);
3072  extern int simple_link(struct dentry *, struct inode *, struct dentry *);
3073  extern int simple_unlink(struct inode *, struct dentry *);
3074  extern int simple_rmdir(struct inode *, struct dentry *);
3075  void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
3076  			     struct inode *new_dir, struct dentry *new_dentry);
3077  extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
3078  				  struct inode *new_dir, struct dentry *new_dentry);
3079  extern int simple_rename(struct mnt_idmap *, struct inode *,
3080  			 struct dentry *, struct inode *, struct dentry *,
3081  			 unsigned int);
3082  extern void simple_recursive_removal(struct dentry *,
3083                                void (*callback)(struct dentry *));
3084  extern int noop_fsync(struct file *, loff_t, loff_t, int);
3085  extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
3086  extern int simple_empty(struct dentry *);
3087  extern int simple_write_begin(struct file *file, struct address_space *mapping,
3088  			loff_t pos, unsigned len,
3089  			struct page **pagep, void **fsdata);
3090  extern const struct address_space_operations ram_aops;
3091  extern int always_delete_dentry(const struct dentry *);
3092  extern struct inode *alloc_anon_inode(struct super_block *);
3093  extern int simple_nosetlease(struct file *, int, struct file_lock **, void **);
3094  extern const struct dentry_operations simple_dentry_operations;
3095  
3096  extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
3097  extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
3098  extern const struct file_operations simple_dir_operations;
3099  extern const struct inode_operations simple_dir_inode_operations;
3100  extern void make_empty_dir_inode(struct inode *inode);
3101  extern bool is_empty_dir_inode(struct inode *inode);
3102  struct tree_descr { const char *name; const struct file_operations *ops; int mode; };
3103  struct dentry *d_alloc_name(struct dentry *, const char *);
3104  extern int simple_fill_super(struct super_block *, unsigned long,
3105  			     const struct tree_descr *);
3106  extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
3107  extern void simple_release_fs(struct vfsmount **mount, int *count);
3108  
3109  extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
3110  			loff_t *ppos, const void *from, size_t available);
3111  extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
3112  		const void __user *from, size_t count);
3113  
3114  struct offset_ctx {
3115  	struct xarray		xa;
3116  	u32			next_offset;
3117  };
3118  
3119  void simple_offset_init(struct offset_ctx *octx);
3120  int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
3121  void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
3122  int simple_offset_rename_exchange(struct inode *old_dir,
3123  				  struct dentry *old_dentry,
3124  				  struct inode *new_dir,
3125  				  struct dentry *new_dentry);
3126  void simple_offset_destroy(struct offset_ctx *octx);
3127  
3128  extern const struct file_operations simple_offset_dir_operations;
3129  
3130  extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
3131  extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
3132  
3133  extern int generic_check_addressable(unsigned, u64);
3134  
3135  extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
3136  
3137  int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
3138  		unsigned int ia_valid);
3139  int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *);
3140  extern int inode_newsize_ok(const struct inode *, loff_t offset);
3141  void setattr_copy(struct mnt_idmap *, struct inode *inode,
3142  		  const struct iattr *attr);
3143  
3144  extern int file_update_time(struct file *file);
3145  
vma_is_dax(const struct vm_area_struct * vma)3146  static inline bool vma_is_dax(const struct vm_area_struct *vma)
3147  {
3148  	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
3149  }
3150  
vma_is_fsdax(struct vm_area_struct * vma)3151  static inline bool vma_is_fsdax(struct vm_area_struct *vma)
3152  {
3153  	struct inode *inode;
3154  
3155  	if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file)
3156  		return false;
3157  	if (!vma_is_dax(vma))
3158  		return false;
3159  	inode = file_inode(vma->vm_file);
3160  	if (S_ISCHR(inode->i_mode))
3161  		return false; /* device-dax */
3162  	return true;
3163  }
3164  
iocb_flags(struct file * file)3165  static inline int iocb_flags(struct file *file)
3166  {
3167  	int res = 0;
3168  	if (file->f_flags & O_APPEND)
3169  		res |= IOCB_APPEND;
3170  	if (file->f_flags & O_DIRECT)
3171  		res |= IOCB_DIRECT;
3172  	if (file->f_flags & O_DSYNC)
3173  		res |= IOCB_DSYNC;
3174  	if (file->f_flags & __O_SYNC)
3175  		res |= IOCB_SYNC;
3176  	return res;
3177  }
3178  
kiocb_set_rw_flags(struct kiocb * ki,rwf_t flags)3179  static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
3180  {
3181  	int kiocb_flags = 0;
3182  
3183  	/* make sure there's no overlap between RWF and private IOCB flags */
3184  	BUILD_BUG_ON((__force int) RWF_SUPPORTED & IOCB_EVENTFD);
3185  
3186  	if (!flags)
3187  		return 0;
3188  	if (unlikely(flags & ~RWF_SUPPORTED))
3189  		return -EOPNOTSUPP;
3190  
3191  	if (flags & RWF_NOWAIT) {
3192  		if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
3193  			return -EOPNOTSUPP;
3194  		kiocb_flags |= IOCB_NOIO;
3195  	}
3196  	kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
3197  	if (flags & RWF_SYNC)
3198  		kiocb_flags |= IOCB_DSYNC;
3199  
3200  	ki->ki_flags |= kiocb_flags;
3201  	return 0;
3202  }
3203  
parent_ino(struct dentry * dentry)3204  static inline ino_t parent_ino(struct dentry *dentry)
3205  {
3206  	ino_t res;
3207  
3208  	/*
3209  	 * Don't strictly need d_lock here? If the parent ino could change
3210  	 * then surely we'd have a deeper race in the caller?
3211  	 */
3212  	spin_lock(&dentry->d_lock);
3213  	res = dentry->d_parent->d_inode->i_ino;
3214  	spin_unlock(&dentry->d_lock);
3215  	return res;
3216  }
3217  
3218  /* Transaction based IO helpers */
3219  
3220  /*
3221   * An argresp is stored in an allocated page and holds the
3222   * size of the argument or response, along with its content
3223   */
3224  struct simple_transaction_argresp {
3225  	ssize_t size;
3226  	char data[];
3227  };
3228  
3229  #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))
3230  
3231  char *simple_transaction_get(struct file *file, const char __user *buf,
3232  				size_t size);
3233  ssize_t simple_transaction_read(struct file *file, char __user *buf,
3234  				size_t size, loff_t *pos);
3235  int simple_transaction_release(struct inode *inode, struct file *file);
3236  
3237  void simple_transaction_set(struct file *file, size_t n);
3238  
3239  /*
3240   * simple attribute files
3241   *
3242   * These attributes behave similar to those in sysfs:
3243   *
3244   * Writing to an attribute immediately sets a value, an open file can be
3245   * written to multiple times.
3246   *
3247   * Reading from an attribute creates a buffer from the value that might get
3248   * read with multiple read calls. When the attribute has been read
3249   * completely, no further read calls are possible until the file is opened
3250   * again.
3251   *
3252   * All attributes contain a text representation of a numeric value
3253   * that are accessed with the get() and set() functions.
3254   */
3255  #define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed)	\
3256  static int __fops ## _open(struct inode *inode, struct file *file)	\
3257  {									\
3258  	__simple_attr_check_format(__fmt, 0ull);			\
3259  	return simple_attr_open(inode, file, __get, __set, __fmt);	\
3260  }									\
3261  static const struct file_operations __fops = {				\
3262  	.owner	 = THIS_MODULE,						\
3263  	.open	 = __fops ## _open,					\
3264  	.release = simple_attr_release,					\
3265  	.read	 = simple_attr_read,					\
3266  	.write	 = (__is_signed) ? simple_attr_write_signed : simple_attr_write,	\
3267  	.llseek	 = generic_file_llseek,					\
3268  }
3269  
3270  #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)		\
3271  	DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false)
3272  
3273  #define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt)	\
3274  	DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true)
3275  
3276  static inline __printf(1, 2)
__simple_attr_check_format(const char * fmt,...)3277  void __simple_attr_check_format(const char *fmt, ...)
3278  {
3279  	/* don't do anything, just let the compiler check the arguments; */
3280  }
3281  
3282  int simple_attr_open(struct inode *inode, struct file *file,
3283  		     int (*get)(void *, u64 *), int (*set)(void *, u64),
3284  		     const char *fmt);
3285  int simple_attr_release(struct inode *inode, struct file *file);
3286  ssize_t simple_attr_read(struct file *file, char __user *buf,
3287  			 size_t len, loff_t *ppos);
3288  ssize_t simple_attr_write(struct file *file, const char __user *buf,
3289  			  size_t len, loff_t *ppos);
3290  ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
3291  				 size_t len, loff_t *ppos);
3292  
3293  struct ctl_table;
3294  int __init list_bdev_fs_names(char *buf, size_t size);
3295  
3296  #define __FMODE_EXEC		((__force int) FMODE_EXEC)
3297  #define __FMODE_NONOTIFY	((__force int) FMODE_NONOTIFY)
3298  
3299  #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
3300  #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
3301  					    (flag & __FMODE_NONOTIFY)))
3302  
is_sxid(umode_t mode)3303  static inline bool is_sxid(umode_t mode)
3304  {
3305  	return mode & (S_ISUID | S_ISGID);
3306  }
3307  
check_sticky(struct mnt_idmap * idmap,struct inode * dir,struct inode * inode)3308  static inline int check_sticky(struct mnt_idmap *idmap,
3309  			       struct inode *dir, struct inode *inode)
3310  {
3311  	if (!(dir->i_mode & S_ISVTX))
3312  		return 0;
3313  
3314  	return __check_sticky(idmap, dir, inode);
3315  }
3316  
inode_has_no_xattr(struct inode * inode)3317  static inline void inode_has_no_xattr(struct inode *inode)
3318  {
3319  	if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & SB_NOSEC))
3320  		inode->i_flags |= S_NOSEC;
3321  }
3322  
is_root_inode(struct inode * inode)3323  static inline bool is_root_inode(struct inode *inode)
3324  {
3325  	return inode == inode->i_sb->s_root->d_inode;
3326  }
3327  
dir_emit(struct dir_context * ctx,const char * name,int namelen,u64 ino,unsigned type)3328  static inline bool dir_emit(struct dir_context *ctx,
3329  			    const char *name, int namelen,
3330  			    u64 ino, unsigned type)
3331  {
3332  	return ctx->actor(ctx, name, namelen, ctx->pos, ino, type);
3333  }
dir_emit_dot(struct file * file,struct dir_context * ctx)3334  static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx)
3335  {
3336  	return ctx->actor(ctx, ".", 1, ctx->pos,
3337  			  file->f_path.dentry->d_inode->i_ino, DT_DIR);
3338  }
dir_emit_dotdot(struct file * file,struct dir_context * ctx)3339  static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx)
3340  {
3341  	return ctx->actor(ctx, "..", 2, ctx->pos,
3342  			  parent_ino(file->f_path.dentry), DT_DIR);
3343  }
dir_emit_dots(struct file * file,struct dir_context * ctx)3344  static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
3345  {
3346  	if (ctx->pos == 0) {
3347  		if (!dir_emit_dot(file, ctx))
3348  			return false;
3349  		ctx->pos = 1;
3350  	}
3351  	if (ctx->pos == 1) {
3352  		if (!dir_emit_dotdot(file, ctx))
3353  			return false;
3354  		ctx->pos = 2;
3355  	}
3356  	return true;
3357  }
dir_relax(struct inode * inode)3358  static inline bool dir_relax(struct inode *inode)
3359  {
3360  	inode_unlock(inode);
3361  	inode_lock(inode);
3362  	return !IS_DEADDIR(inode);
3363  }
3364  
dir_relax_shared(struct inode * inode)3365  static inline bool dir_relax_shared(struct inode *inode)
3366  {
3367  	inode_unlock_shared(inode);
3368  	inode_lock_shared(inode);
3369  	return !IS_DEADDIR(inode);
3370  }
3371  
3372  extern bool path_noexec(const struct path *path);
3373  extern void inode_nohighmem(struct inode *inode);
3374  
3375  /* mm/fadvise.c */
3376  extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
3377  		       int advice);
3378  extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
3379  			   int advice);
3380  
3381  #endif /* _LINUX_FS_H */
3382