diff options
Diffstat (limited to 'extras/recipes-kernel/linux/linux-omap/linus/0035-update-Documentation-filesystems-Locking.patch')
-rw-r--r-- | extras/recipes-kernel/linux/linux-omap/linus/0035-update-Documentation-filesystems-Locking.patch | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/extras/recipes-kernel/linux/linux-omap/linus/0035-update-Documentation-filesystems-Locking.patch b/extras/recipes-kernel/linux/linux-omap/linus/0035-update-Documentation-filesystems-Locking.patch new file mode 100644 index 00000000..5a5610b5 --- /dev/null +++ b/extras/recipes-kernel/linux/linux-omap/linus/0035-update-Documentation-filesystems-Locking.patch | |||
@@ -0,0 +1,402 @@ | |||
1 | From 84a03bcb1f1fb5b6a9f6f508fa0a1fae41a5827a Mon Sep 17 00:00:00 2001 | ||
2 | From: Christoph Hellwig <hch> | ||
3 | Date: Thu, 16 Dec 2010 12:04:54 +0100 | ||
4 | Subject: [PATCH 35/65] update Documentation/filesystems/Locking | ||
5 | |||
6 | Mostly inspired by all the recent BKL removal changes, but a lot of older | ||
7 | updates also weren't properly recorded. | ||
8 | |||
9 | Signed-off-by: Christoph Hellwig <hch@lst.de> | ||
10 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
11 | --- | ||
12 | Documentation/filesystems/Locking | 214 ++++++++++++++++++------------------- | ||
13 | 1 files changed, 102 insertions(+), 112 deletions(-) | ||
14 | |||
15 | diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking | ||
16 | index b6426f1..7686e76 100644 | ||
17 | --- a/Documentation/filesystems/Locking | ||
18 | +++ b/Documentation/filesystems/Locking | ||
19 | @@ -18,7 +18,6 @@ prototypes: | ||
20 | char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); | ||
21 | |||
22 | locking rules: | ||
23 | - none have BKL | ||
24 | dcache_lock rename_lock ->d_lock may block | ||
25 | d_revalidate: no no no yes | ||
26 | d_hash no no no yes | ||
27 | @@ -42,18 +41,23 @@ ata *); | ||
28 | int (*rename) (struct inode *, struct dentry *, | ||
29 | struct inode *, struct dentry *); | ||
30 | int (*readlink) (struct dentry *, char __user *,int); | ||
31 | - int (*follow_link) (struct dentry *, struct nameidata *); | ||
32 | + void * (*follow_link) (struct dentry *, struct nameidata *); | ||
33 | + void (*put_link) (struct dentry *, struct nameidata *, void *); | ||
34 | void (*truncate) (struct inode *); | ||
35 | int (*permission) (struct inode *, int, struct nameidata *); | ||
36 | + int (*check_acl)(struct inode *, int); | ||
37 | int (*setattr) (struct dentry *, struct iattr *); | ||
38 | int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); | ||
39 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); | ||
40 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); | ||
41 | ssize_t (*listxattr) (struct dentry *, char *, size_t); | ||
42 | int (*removexattr) (struct dentry *, const char *); | ||
43 | + void (*truncate_range)(struct inode *, loff_t, loff_t); | ||
44 | + long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len); | ||
45 | + int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); | ||
46 | |||
47 | locking rules: | ||
48 | - all may block, none have BKL | ||
49 | + all may block | ||
50 | i_mutex(inode) | ||
51 | lookup: yes | ||
52 | create: yes | ||
53 | @@ -66,19 +70,24 @@ rmdir: yes (both) (see below) | ||
54 | rename: yes (all) (see below) | ||
55 | readlink: no | ||
56 | follow_link: no | ||
57 | +put_link: no | ||
58 | truncate: yes (see below) | ||
59 | setattr: yes | ||
60 | permission: no | ||
61 | +check_acl: no | ||
62 | getattr: no | ||
63 | setxattr: yes | ||
64 | getxattr: no | ||
65 | listxattr: no | ||
66 | removexattr: yes | ||
67 | +truncate_range: yes | ||
68 | +fallocate: no | ||
69 | +fiemap: no | ||
70 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on | ||
71 | victim. | ||
72 | cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. | ||
73 | ->truncate() is never called directly - it's a callback, not a | ||
74 | -method. It's called by vmtruncate() - library function normally used by | ||
75 | +method. It's called by vmtruncate() - deprecated library function used by | ||
76 | ->setattr(). Locking information above applies to that call (i.e. is | ||
77 | inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been | ||
78 | passed). | ||
79 | @@ -91,7 +100,7 @@ prototypes: | ||
80 | struct inode *(*alloc_inode)(struct super_block *sb); | ||
81 | void (*destroy_inode)(struct inode *); | ||
82 | void (*dirty_inode) (struct inode *); | ||
83 | - int (*write_inode) (struct inode *, int); | ||
84 | + int (*write_inode) (struct inode *, struct writeback_control *wbc); | ||
85 | int (*drop_inode) (struct inode *); | ||
86 | void (*evict_inode) (struct inode *); | ||
87 | void (*put_super) (struct super_block *); | ||
88 | @@ -105,10 +114,11 @@ prototypes: | ||
89 | int (*show_options)(struct seq_file *, struct vfsmount *); | ||
90 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | ||
91 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | ||
92 | + int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); | ||
93 | + int (*trim_fs) (struct super_block *, struct fstrim_range *); | ||
94 | |||
95 | locking rules: | ||
96 | All may block [not true, see below] | ||
97 | - None have BKL | ||
98 | s_umount | ||
99 | alloc_inode: | ||
100 | destroy_inode: | ||
101 | @@ -127,6 +137,8 @@ umount_begin: no | ||
102 | show_options: no (namespace_sem) | ||
103 | quota_read: no (see below) | ||
104 | quota_write: no (see below) | ||
105 | +bdev_try_to_free_page: no (see below) | ||
106 | +trim_fs: no | ||
107 | |||
108 | ->statfs() has s_umount (shared) when called by ustat(2) (native or | ||
109 | compat), but that's an accident of bad API; s_umount is used to pin | ||
110 | @@ -139,19 +151,25 @@ be the only ones operating on the quota file by the quota code (via | ||
111 | dqio_sem) (unless an admin really wants to screw up something and | ||
112 | writes to quota files with quotas on). For other details about locking | ||
113 | see also dquot_operations section. | ||
114 | +->bdev_try_to_free_page is called from the ->releasepage handler of | ||
115 | +the block device inode. See there for more details. | ||
116 | |||
117 | --------------------------- file_system_type --------------------------- | ||
118 | prototypes: | ||
119 | int (*get_sb) (struct file_system_type *, int, | ||
120 | const char *, void *, struct vfsmount *); | ||
121 | + struct dentry *(*mount) (struct file_system_type *, int, | ||
122 | + const char *, void *); | ||
123 | void (*kill_sb) (struct super_block *); | ||
124 | locking rules: | ||
125 | - may block BKL | ||
126 | -get_sb yes no | ||
127 | -kill_sb yes no | ||
128 | + may block | ||
129 | +get_sb yes | ||
130 | +mount yes | ||
131 | +kill_sb yes | ||
132 | |||
133 | ->get_sb() returns error or 0 with locked superblock attached to the vfsmount | ||
134 | (exclusive on ->s_umount). | ||
135 | +->mount() returns ERR_PTR or the root dentry. | ||
136 | ->kill_sb() takes a write-locked superblock, does all shutdown work on it, | ||
137 | unlocks and drops the reference. | ||
138 | |||
139 | @@ -176,27 +194,35 @@ prototypes: | ||
140 | void (*freepage)(struct page *); | ||
141 | int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, | ||
142 | loff_t offset, unsigned long nr_segs); | ||
143 | - int (*launder_page) (struct page *); | ||
144 | + int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **, | ||
145 | + unsigned long *); | ||
146 | + int (*migratepage)(struct address_space *, struct page *, struct page *); | ||
147 | + int (*launder_page)(struct page *); | ||
148 | + int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long); | ||
149 | + int (*error_remove_page)(struct address_space *, struct page *); | ||
150 | |||
151 | locking rules: | ||
152 | All except set_page_dirty and freepage may block | ||
153 | |||
154 | - BKL PageLocked(page) i_mutex | ||
155 | -writepage: no yes, unlocks (see below) | ||
156 | -readpage: no yes, unlocks | ||
157 | -sync_page: no maybe | ||
158 | -writepages: no | ||
159 | -set_page_dirty no no | ||
160 | -readpages: no | ||
161 | -write_begin: no locks the page yes | ||
162 | -write_end: no yes, unlocks yes | ||
163 | -perform_write: no n/a yes | ||
164 | -bmap: no | ||
165 | -invalidatepage: no yes | ||
166 | -releasepage: no yes | ||
167 | -freepage: no yes | ||
168 | -direct_IO: no | ||
169 | -launder_page: no yes | ||
170 | + PageLocked(page) i_mutex | ||
171 | +writepage: yes, unlocks (see below) | ||
172 | +readpage: yes, unlocks | ||
173 | +sync_page: maybe | ||
174 | +writepages: | ||
175 | +set_page_dirty no | ||
176 | +readpages: | ||
177 | +write_begin: locks the page yes | ||
178 | +write_end: yes, unlocks yes | ||
179 | +bmap: | ||
180 | +invalidatepage: yes | ||
181 | +releasepage: yes | ||
182 | +freepage: yes | ||
183 | +direct_IO: | ||
184 | +get_xip_mem: maybe | ||
185 | +migratepage: yes (both) | ||
186 | +launder_page: yes | ||
187 | +is_partially_uptodate: yes | ||
188 | +error_remove_page: yes | ||
189 | |||
190 | ->write_begin(), ->write_end(), ->sync_page() and ->readpage() | ||
191 | may be called from the request handler (/dev/loop). | ||
192 | @@ -276,9 +302,8 @@ under spinlock (it cannot block) and is sometimes called with the page | ||
193 | not locked. | ||
194 | |||
195 | ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some | ||
196 | -filesystems and by the swapper. The latter will eventually go away. All | ||
197 | -instances do not actually need the BKL. Please, keep it that way and don't | ||
198 | -breed new callers. | ||
199 | +filesystems and by the swapper. The latter will eventually go away. Please, | ||
200 | +keep it that way and don't breed new callers. | ||
201 | |||
202 | ->invalidatepage() is called when the filesystem must attempt to drop | ||
203 | some or all of the buffers from the page when it is being truncated. It | ||
204 | @@ -299,47 +324,37 @@ cleaned, or an error value if not. Note that in order to prevent the page | ||
205 | getting mapped back in and redirtied, it needs to be kept locked | ||
206 | across the entire operation. | ||
207 | |||
208 | - Note: currently almost all instances of address_space methods are | ||
209 | -using BKL for internal serialization and that's one of the worst sources | ||
210 | -of contention. Normally they are calling library functions (in fs/buffer.c) | ||
211 | -and pass foo_get_block() as a callback (on local block-based filesystems, | ||
212 | -indeed). BKL is not needed for library stuff and is usually taken by | ||
213 | -foo_get_block(). It's an overkill, since block bitmaps can be protected by | ||
214 | -internal fs locking and real critical areas are much smaller than the areas | ||
215 | -filesystems protect now. | ||
216 | - | ||
217 | ----------------------- file_lock_operations ------------------------------ | ||
218 | prototypes: | ||
219 | - void (*fl_insert)(struct file_lock *); /* lock insertion callback */ | ||
220 | - void (*fl_remove)(struct file_lock *); /* lock removal callback */ | ||
221 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | ||
222 | void (*fl_release_private)(struct file_lock *); | ||
223 | |||
224 | |||
225 | locking rules: | ||
226 | - BKL may block | ||
227 | -fl_insert: yes no | ||
228 | -fl_remove: yes no | ||
229 | -fl_copy_lock: yes no | ||
230 | -fl_release_private: yes yes | ||
231 | + file_lock_lock may block | ||
232 | +fl_copy_lock: yes no | ||
233 | +fl_release_private: maybe no | ||
234 | |||
235 | ----------------------- lock_manager_operations --------------------------- | ||
236 | prototypes: | ||
237 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); | ||
238 | void (*fl_notify)(struct file_lock *); /* unblock callback */ | ||
239 | + int (*fl_grant)(struct file_lock *, struct file_lock *, int); | ||
240 | void (*fl_release_private)(struct file_lock *); | ||
241 | void (*fl_break)(struct file_lock *); /* break_lease callback */ | ||
242 | + int (*fl_mylease)(struct file_lock *, struct file_lock *); | ||
243 | + int (*fl_change)(struct file_lock **, int); | ||
244 | |||
245 | locking rules: | ||
246 | - BKL may block | ||
247 | -fl_compare_owner: yes no | ||
248 | -fl_notify: yes no | ||
249 | -fl_release_private: yes yes | ||
250 | -fl_break: yes no | ||
251 | - | ||
252 | - Currently only NFSD and NLM provide instances of this class. None of the | ||
253 | -them block. If you have out-of-tree instances - please, show up. Locking | ||
254 | -in that area will change. | ||
255 | + file_lock_lock may block | ||
256 | +fl_compare_owner: yes no | ||
257 | +fl_notify: yes no | ||
258 | +fl_grant: no no | ||
259 | +fl_release_private: maybe no | ||
260 | +fl_break: yes no | ||
261 | +fl_mylease: yes no | ||
262 | +fl_change yes no | ||
263 | + | ||
264 | --------------------------- buffer_head ----------------------------------- | ||
265 | prototypes: | ||
266 | void (*b_end_io)(struct buffer_head *bh, int uptodate); | ||
267 | @@ -364,17 +379,17 @@ prototypes: | ||
268 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); | ||
269 | |||
270 | locking rules: | ||
271 | - BKL bd_mutex | ||
272 | -open: no yes | ||
273 | -release: no yes | ||
274 | -ioctl: no no | ||
275 | -compat_ioctl: no no | ||
276 | -direct_access: no no | ||
277 | -media_changed: no no | ||
278 | -unlock_native_capacity: no no | ||
279 | -revalidate_disk: no no | ||
280 | -getgeo: no no | ||
281 | -swap_slot_free_notify: no no (see below) | ||
282 | + bd_mutex | ||
283 | +open: yes | ||
284 | +release: yes | ||
285 | +ioctl: no | ||
286 | +compat_ioctl: no | ||
287 | +direct_access: no | ||
288 | +media_changed: no | ||
289 | +unlock_native_capacity: no | ||
290 | +revalidate_disk: no | ||
291 | +getgeo: no | ||
292 | +swap_slot_free_notify: no (see below) | ||
293 | |||
294 | media_changed, unlock_native_capacity and revalidate_disk are called only from | ||
295 | check_disk_change(). | ||
296 | @@ -413,34 +428,21 @@ prototypes: | ||
297 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, | ||
298 | unsigned long, unsigned long, unsigned long); | ||
299 | int (*check_flags)(int); | ||
300 | + int (*flock) (struct file *, int, struct file_lock *); | ||
301 | + ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, | ||
302 | + size_t, unsigned int); | ||
303 | + ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, | ||
304 | + size_t, unsigned int); | ||
305 | + int (*setlease)(struct file *, long, struct file_lock **); | ||
306 | }; | ||
307 | |||
308 | locking rules: | ||
309 | - All may block. | ||
310 | - BKL | ||
311 | -llseek: no (see below) | ||
312 | -read: no | ||
313 | -aio_read: no | ||
314 | -write: no | ||
315 | -aio_write: no | ||
316 | -readdir: no | ||
317 | -poll: no | ||
318 | -unlocked_ioctl: no | ||
319 | -compat_ioctl: no | ||
320 | -mmap: no | ||
321 | -open: no | ||
322 | -flush: no | ||
323 | -release: no | ||
324 | -fsync: no (see below) | ||
325 | -aio_fsync: no | ||
326 | -fasync: no | ||
327 | -lock: yes | ||
328 | -readv: no | ||
329 | -writev: no | ||
330 | -sendfile: no | ||
331 | -sendpage: no | ||
332 | -get_unmapped_area: no | ||
333 | -check_flags: no | ||
334 | + All may block except for ->setlease. | ||
335 | + No VFS locks held on entry except for ->fsync and ->setlease. | ||
336 | + | ||
337 | +->fsync() has i_mutex on inode. | ||
338 | + | ||
339 | +->setlease has the file_list_lock held and must not sleep. | ||
340 | |||
341 | ->llseek() locking has moved from llseek to the individual llseek | ||
342 | implementations. If your fs is not using generic_file_llseek, you | ||
343 | @@ -450,17 +452,10 @@ mutex or just to use i_size_read() instead. | ||
344 | Note: this does not protect the file->f_pos against concurrent modifications | ||
345 | since this is something the userspace has to take care about. | ||
346 | |||
347 | -Note: ext2_release() was *the* source of contention on fs-intensive | ||
348 | -loads and dropping BKL on ->release() helps to get rid of that (we still | ||
349 | -grab BKL for cases when we close a file that had been opened r/w, but that | ||
350 | -can and should be done using the internal locking with smaller critical areas). | ||
351 | -Current worst offender is ext2_get_block()... | ||
352 | - | ||
353 | -->fasync() is called without BKL protection, and is responsible for | ||
354 | -maintaining the FASYNC bit in filp->f_flags. Most instances call | ||
355 | -fasync_helper(), which does that maintenance, so it's not normally | ||
356 | -something one needs to worry about. Return values > 0 will be mapped to | ||
357 | -zero in the VFS layer. | ||
358 | +->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags. | ||
359 | +Most instances call fasync_helper(), which does that maintenance, so it's | ||
360 | +not normally something one needs to worry about. Return values > 0 will be | ||
361 | +mapped to zero in the VFS layer. | ||
362 | |||
363 | ->readdir() and ->ioctl() on directories must be changed. Ideally we would | ||
364 | move ->readdir() to inode_operations and use a separate method for directory | ||
365 | @@ -471,8 +466,6 @@ components. And there are other reasons why the current interface is a mess... | ||
366 | ->read on directories probably must go away - we should just enforce -EISDIR | ||
367 | in sys_read() and friends. | ||
368 | |||
369 | -->fsync() has i_mutex on inode. | ||
370 | - | ||
371 | --------------------------- dquot_operations ------------------------------- | ||
372 | prototypes: | ||
373 | int (*write_dquot) (struct dquot *); | ||
374 | @@ -507,12 +500,12 @@ prototypes: | ||
375 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); | ||
376 | |||
377 | locking rules: | ||
378 | - BKL mmap_sem PageLocked(page) | ||
379 | -open: no yes | ||
380 | -close: no yes | ||
381 | -fault: no yes can return with page locked | ||
382 | -page_mkwrite: no yes can return with page locked | ||
383 | -access: no yes | ||
384 | + mmap_sem PageLocked(page) | ||
385 | +open: yes | ||
386 | +close: yes | ||
387 | +fault: yes can return with page locked | ||
388 | +page_mkwrite: yes can return with page locked | ||
389 | +access: yes | ||
390 | |||
391 | ->fault() is called when a previously not present pte is about | ||
392 | to be faulted in. The filesystem must find and return the page associated | ||
393 | @@ -539,6 +532,3 @@ VM_IO | VM_PFNMAP VMAs. | ||
394 | |||
395 | (if you break something or notice that it is broken and do not fix it yourself | ||
396 | - at least put it here) | ||
397 | - | ||
398 | -ipc/shm.c::shm_delete() - may need BKL. | ||
399 | -->read() and ->write() in many drivers are (probably) missing BKL. | ||
400 | -- | ||
401 | 1.6.6.1 | ||
402 | |||