summaryrefslogtreecommitdiffstats
path: root/extras/recipes-kernel/linux/linux-omap-2.6.39/sakoman/0029-unionfs-Add-support-for-unionfs-2.5.9.patch
diff options
context:
space:
mode:
Diffstat (limited to 'extras/recipes-kernel/linux/linux-omap-2.6.39/sakoman/0029-unionfs-Add-support-for-unionfs-2.5.9.patch')
-rw-r--r--extras/recipes-kernel/linux/linux-omap-2.6.39/sakoman/0029-unionfs-Add-support-for-unionfs-2.5.9.patch11494
1 files changed, 11494 insertions, 0 deletions
diff --git a/extras/recipes-kernel/linux/linux-omap-2.6.39/sakoman/0029-unionfs-Add-support-for-unionfs-2.5.9.patch b/extras/recipes-kernel/linux/linux-omap-2.6.39/sakoman/0029-unionfs-Add-support-for-unionfs-2.5.9.patch
new file mode 100644
index 00000000..c17608b0
--- /dev/null
+++ b/extras/recipes-kernel/linux/linux-omap-2.6.39/sakoman/0029-unionfs-Add-support-for-unionfs-2.5.9.patch
@@ -0,0 +1,11494 @@
1From 0de368979f94e7c51940979c6149d34aec08f13f Mon Sep 17 00:00:00 2001
2From: Steve Sakoman <steve@sakoman.com>
3Date: Mon, 2 May 2011 16:14:34 -0700
4Subject: [PATCH 29/32] unionfs: Add support for unionfs 2.5.9
5
6---
7 Documentation/filesystems/00-INDEX | 2 +
8 Documentation/filesystems/unionfs/00-INDEX | 10 +
9 Documentation/filesystems/unionfs/concepts.txt | 287 ++++++
10 Documentation/filesystems/unionfs/issues.txt | 28 +
11 Documentation/filesystems/unionfs/rename.txt | 31 +
12 Documentation/filesystems/unionfs/usage.txt | 134 +++
13 MAINTAINERS | 8 +
14 fs/Kconfig | 1 +
15 fs/Makefile | 1 +
16 fs/namei.c | 38 +
17 fs/splice.c | 22 +-
18 fs/stack.c | 14 +-
19 fs/unionfs/Kconfig | 24 +
20 fs/unionfs/Makefile | 17 +
21 fs/unionfs/commonfops.c | 898 +++++++++++++++++++
22 fs/unionfs/copyup.c | 896 +++++++++++++++++++
23 fs/unionfs/debug.c | 548 ++++++++++++
24 fs/unionfs/dentry.c | 406 +++++++++
25 fs/unionfs/dirfops.c | 302 +++++++
26 fs/unionfs/dirhelper.c | 158 ++++
27 fs/unionfs/fanout.h | 407 +++++++++
28 fs/unionfs/file.c | 382 ++++++++
29 fs/unionfs/inode.c | 1099 ++++++++++++++++++++++++
30 fs/unionfs/lookup.c | 569 ++++++++++++
31 fs/unionfs/main.c | 763 ++++++++++++++++
32 fs/unionfs/mmap.c | 89 ++
33 fs/unionfs/rdstate.c | 285 ++++++
34 fs/unionfs/rename.c | 522 +++++++++++
35 fs/unionfs/sioq.c | 101 +++
36 fs/unionfs/sioq.h | 91 ++
37 fs/unionfs/subr.c | 95 ++
38 fs/unionfs/super.c | 1030 ++++++++++++++++++++++
39 fs/unionfs/union.h | 679 +++++++++++++++
40 fs/unionfs/unlink.c | 278 ++++++
41 fs/unionfs/whiteout.c | 601 +++++++++++++
42 fs/unionfs/xattr.c | 173 ++++
43 include/linux/fs_stack.h | 14 +-
44 include/linux/magic.h | 2 +
45 include/linux/namei.h | 3 +
46 include/linux/splice.h | 5 +
47 include/linux/union_fs.h | 22 +
48 security/security.c | 1 +
49 42 files changed, 11024 insertions(+), 12 deletions(-)
50 create mode 100644 Documentation/filesystems/unionfs/00-INDEX
51 create mode 100644 Documentation/filesystems/unionfs/concepts.txt
52 create mode 100644 Documentation/filesystems/unionfs/issues.txt
53 create mode 100644 Documentation/filesystems/unionfs/rename.txt
54 create mode 100644 Documentation/filesystems/unionfs/usage.txt
55 create mode 100644 fs/unionfs/Kconfig
56 create mode 100644 fs/unionfs/Makefile
57 create mode 100644 fs/unionfs/commonfops.c
58 create mode 100644 fs/unionfs/copyup.c
59 create mode 100644 fs/unionfs/debug.c
60 create mode 100644 fs/unionfs/dentry.c
61 create mode 100644 fs/unionfs/dirfops.c
62 create mode 100644 fs/unionfs/dirhelper.c
63 create mode 100644 fs/unionfs/fanout.h
64 create mode 100644 fs/unionfs/file.c
65 create mode 100644 fs/unionfs/inode.c
66 create mode 100644 fs/unionfs/lookup.c
67 create mode 100644 fs/unionfs/main.c
68 create mode 100644 fs/unionfs/mmap.c
69 create mode 100644 fs/unionfs/rdstate.c
70 create mode 100644 fs/unionfs/rename.c
71 create mode 100644 fs/unionfs/sioq.c
72 create mode 100644 fs/unionfs/sioq.h
73 create mode 100644 fs/unionfs/subr.c
74 create mode 100644 fs/unionfs/super.c
75 create mode 100644 fs/unionfs/union.h
76 create mode 100644 fs/unionfs/unlink.c
77 create mode 100644 fs/unionfs/whiteout.c
78 create mode 100644 fs/unionfs/xattr.c
79 create mode 100644 include/linux/union_fs.h
80
81diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
82index 8c624a1..4aa288b 100644
83--- a/Documentation/filesystems/00-INDEX
84+++ b/Documentation/filesystems/00-INDEX
85@@ -110,6 +110,8 @@ udf.txt
86 - info and mount options for the UDF filesystem.
87 ufs.txt
88 - info on the ufs filesystem.
89+unionfs/
90+ - info on the unionfs filesystem
91 vfat.txt
92 - info on using the VFAT filesystem used in Windows NT and Windows 95
93 vfs.txt
94diff --git a/Documentation/filesystems/unionfs/00-INDEX b/Documentation/filesystems/unionfs/00-INDEX
95new file mode 100644
96index 0000000..96fdf67
97--- /dev/null
98+++ b/Documentation/filesystems/unionfs/00-INDEX
99@@ -0,0 +1,10 @@
100+00-INDEX
101+ - this file.
102+concepts.txt
103+ - A brief introduction of concepts.
104+issues.txt
105+ - A summary of known issues with unionfs.
106+rename.txt
107+ - Information regarding rename operations.
108+usage.txt
109+ - Usage information and examples.
110diff --git a/Documentation/filesystems/unionfs/concepts.txt b/Documentation/filesystems/unionfs/concepts.txt
111new file mode 100644
112index 0000000..b853788
113--- /dev/null
114+++ b/Documentation/filesystems/unionfs/concepts.txt
115@@ -0,0 +1,287 @@
116+Unionfs 2.x CONCEPTS:
117+=====================
118+
119+This file describes the concepts needed by a namespace unification file
120+system.
121+
122+
123+Branch Priority:
124+================
125+
126+Each branch is assigned a unique priority - starting from 0 (highest
127+priority). No two branches can have the same priority.
128+
129+
130+Branch Mode:
131+============
132+
133+Each branch is assigned a mode - read-write or read-only. This allows
134+directories on media mounted read-write to be used in a read-only manner.
135+
136+
137+Whiteouts:
138+==========
139+
140+A whiteout removes a file name from the namespace. Whiteouts are needed when
141+one attempts to remove a file on a read-only branch.
142+
143+Suppose we have a two-branch union, where branch 0 is read-write and branch
144+1 is read-only. And a file 'foo' on branch 1:
145+
146+./b0/
147+./b1/
148+./b1/foo
149+
150+The unified view would simply be:
151+
152+./union/
153+./union/foo
154+
155+Since 'foo' is stored on a read-only branch, it cannot be removed. A
156+whiteout is used to remove the name 'foo' from the unified namespace. Again,
157+since branch 1 is read-only, the whiteout cannot be created there. So, we
158+try on a higher priority (lower numerically) branch and create the whiteout
159+there.
160+
161+./b0/
162+./b0/.wh.foo
163+./b1/
164+./b1/foo
165+
166+Later, when Unionfs traverses branches (due to lookup or readdir), it
167+eliminate 'foo' from the namespace (as well as the whiteout itself.)
168+
169+
170+Opaque Directories:
171+===================
172+
173+Assume we have a unionfs mount comprising of two branches. Branch 0 is
174+empty; branch 1 has the directory /a and file /a/f. Let's say we mount a
175+union of branch 0 as read-write and branch 1 as read-only. Now, let's say
176+we try to perform the following operation in the union:
177+
178+ rm -fr a
179+
180+Because branch 1 is not writable, we cannot physically remove the file /a/f
181+or the directory /a. So instead, we will create a whiteout in branch 0
182+named /.wh.a, masking out the name "a" from branch 1. Next, let's say we
183+try to create a directory named "a" as follows:
184+
185+ mkdir a
186+
187+Because we have a whiteout for "a" already, Unionfs behaves as if "a"
188+doesn't exist, and thus will delete the whiteout and replace it with an
189+actual directory named "a".
190+
191+The problem now is that if you try to "ls" in the union, Unionfs will
192+perform is normal directory name unification, for *all* directories named
193+"a" in all branches. This will cause the file /a/f from branch 1 to
194+re-appear in the union's namespace, which violates Unix semantics.
195+
196+To avoid this problem, we have a different form of whiteouts for
197+directories, called "opaque directories" (same as BSD Union Mount does).
198+Whenever we replace a whiteout with a directory, that directory is marked as
199+opaque. In Unionfs 2.x, it means that we create a file named
200+/a/.wh.__dir_opaque in branch 0, after having created directory /a there.
201+When unionfs notices that a directory is opaque, it stops all namespace
202+operations (including merging readdir contents) at that opaque directory.
203+This prevents re-exposing names from masked out directories.
204+
205+
206+Duplicate Elimination:
207+======================
208+
209+It is possible for files on different branches to have the same name.
210+Unionfs then has to select which instance of the file to show to the user.
211+Given the fact that each branch has a priority associated with it, the
212+simplest solution is to take the instance from the highest priority
213+(numerically lowest value) and "hide" the others.
214+
215+
216+Unlinking:
217+=========
218+
219+Unlink operation on non-directory instances is optimized to remove the
220+maximum possible objects in case multiple underlying branches have the same
221+file name. The unlink operation will first try to delete file instances
222+from highest priority branch and then move further to delete from remaining
223+branches in order of their decreasing priority. Consider a case (F..D..F),
224+where F is a file and D is a directory of the same name; here, some
225+intermediate branch could have an empty directory instance with the same
226+name, so this operation also tries to delete this directory instance and
227+proceed further to delete from next possible lower priority branch. The
228+unionfs unlink operation will smoothly delete the files with same name from
229+all possible underlying branches. In case if some error occurs, it creates
230+whiteout in highest priority branch that will hide file instance in rest of
231+the branches. An error could occur either if an unlink operations in any of
232+the underlying branch failed or if a branch has no write permission.
233+
234+This unlinking policy is known as "delete all" and it has the benefit of
235+overall reducing the number of inodes used by duplicate files, and further
236+reducing the total number of inodes consumed by whiteouts. The cost is of
237+extra processing, but testing shows this extra processing is well worth the
238+savings.
239+
240+
241+Copyup:
242+=======
243+
244+When a change is made to the contents of a file's data or meta-data, they
245+have to be stored somewhere. The best way is to create a copy of the
246+original file on a branch that is writable, and then redirect the write
247+though to this copy. The copy must be made on a higher priority branch so
248+that lookup and readdir return this newer "version" of the file rather than
249+the original (see duplicate elimination).
250+
251+An entire unionfs mount can be read-only or read-write. If it's read-only,
252+then none of the branches will be written to, even if some of the branches
253+are physically writeable. If the unionfs mount is read-write, then the
254+leftmost (highest priority) branch must be writeable (for copyup to take
255+place); the remaining branches can be any mix of read-write and read-only.
256+
257+In a writeable mount, unionfs will create new files/dir in the leftmost
258+branch. If one tries to modify a file in a read-only branch/media, unionfs
259+will copyup the file to the leftmost branch and modify it there. If you try
260+to modify a file from a writeable branch which is not the leftmost branch,
261+then unionfs will modify it in that branch; this is useful if you, say,
262+unify differnet packages (e.g., apache, sendmail, ftpd, etc.) and you want
263+changes to specific package files to remain logically in the directory where
264+they came from.
265+
266+Cache Coherency:
267+================
268+
269+Unionfs users often want to be able to modify files and directories directly
270+on the lower branches, and have those changes be visible at the Unionfs
271+level. This means that data (e.g., pages) and meta-data (dentries, inodes,
272+open files, etc.) have to be synchronized between the upper and lower
273+layers. In other words, the newest changes from a layer below have to be
274+propagated to the Unionfs layer above. If the two layers are not in sync, a
275+cache incoherency ensues, which could lead to application failures and even
276+oopses. The Linux kernel, however, has a rather limited set of mechanisms
277+to ensure this inter-layer cache coherency---so Unionfs has to do most of
278+the hard work on its own.
279+
280+Maintaining Invariants:
281+
282+The way Unionfs ensures cache coherency is as follows. At each entry point
283+to a Unionfs file system method, we call a utility function to validate the
284+primary objects of this method. Generally, we call unionfs_file_revalidate
285+on open files, and __unionfs_d_revalidate_chain on dentries (which also
286+validates inodes). These utility functions check to see whether the upper
287+Unionfs object is in sync with any of the lower objects that it represents.
288+The checks we perform include whether the Unionfs superblock has a newer
289+generation number, or if any of the lower objects mtime's or ctime's are
290+newer. (Note: generation numbers change when branch-management commands are
291+issued, so in a way, maintaining cache coherency is also very important for
292+branch-management.) If indeed we determine that any Unionfs object is no
293+longer in sync with its lower counterparts, then we rebuild that object
294+similarly to how we do so for branch-management.
295+
296+While rebuilding Unionfs's objects, we also purge any page mappings and
297+truncate inode pages (see fs/unionfs/dentry.c:purge_inode_data). This is to
298+ensure that Unionfs will re-get the newer data from the lower branches. We
299+perform this purging only if the Unionfs operation in question is a reading
300+operation; if Unionfs is performing a data writing operation (e.g., ->write,
301+->commit_write, etc.) then we do NOT flush the lower mappings/pages: this is
302+because (1) a self-deadlock could occur and (2) the upper Unionfs pages are
303+considered more authoritative anyway, as they are newer and will overwrite
304+any lower pages.
305+
306+Unionfs maintains the following important invariant regarding mtime's,
307+ctime's, and atime's: the upper inode object's times are the max() of all of
308+the lower ones. For non-directory objects, there's only one object below,
309+so the mapping is simple; for directory objects, there could me multiple
310+lower objects and we have to sync up with the newest one of all the lower
311+ones. This invariant is important to maintain, especially for directories
312+(besides, we need this to be POSIX compliant). A union could comprise
313+multiple writable branches, each of which could change. If we don't reflect
314+the newest possible mtime/ctime, some applications could fail. For example,
315+NFSv2/v3 exports check for newer directory mtimes on the server to determine
316+if the client-side attribute cache should be purged.
317+
318+To maintain these important invariants, of course, Unionfs carefully
319+synchronizes upper and lower times in various places. For example, if we
320+copy-up a file to a top-level branch, the parent directory where the file
321+was copied up to will now have a new mtime: so after a successful copy-up,
322+we sync up with the new top-level branch's parent directory mtime.
323+
324+Implementation:
325+
326+This cache-coherency implementation is efficient because it defers any
327+synchronizing between the upper and lower layers until absolutely needed.
328+Consider the example a common situation where users perform a lot of lower
329+changes, such as untarring a whole package. While these take place,
330+typically the user doesn't access the files via Unionfs; only after the
331+lower changes are done, does the user try to access the lower files. With
332+our cache-coherency implementation, the entirety of the changes to the lower
333+branches will not result in a single CPU cycle spent at the Unionfs level
334+until the user invokes a system call that goes through Unionfs.
335+
336+We have considered two alternate cache-coherency designs. (1) Using the
337+dentry/inode notify functionality to register interest in finding out about
338+any lower changes. This is a somewhat limited and also a heavy-handed
339+approach which could result in many notifications to the Unionfs layer upon
340+each small change at the lower layer (imagine a file being modified multiple
341+times in rapid succession). (2) Rewriting the VFS to support explicit
342+callbacks from lower objects to upper objects. We began exploring such an
343+implementation, but found it to be very complicated--it would have resulted
344+in massive VFS/MM changes which are unlikely to be accepted by the LKML
345+community. We therefore believe that our current cache-coherency design and
346+implementation represent the best approach at this time.
347+
348+Limitations:
349+
350+Our implementation works in that as long as a user process will have caused
351+Unionfs to be called, directly or indirectly, even to just do
352+->d_revalidate; then we will have purged the current Unionfs data and the
353+process will see the new data. For example, a process that continually
354+re-reads the same file's data will see the NEW data as soon as the lower
355+file had changed, upon the next read(2) syscall (even if the file is still
356+open!) However, this doesn't work when the process re-reads the open file's
357+data via mmap(2) (unless the user unmaps/closes the file and remaps/reopens
358+it). Once we respond to ->readpage(s), then the kernel maps the page into
359+the process's address space and there doesn't appear to be a way to force
360+the kernel to invalidate those pages/mappings, and force the process to
361+re-issue ->readpage. If there's a way to invalidate active mappings and
362+force a ->readpage, let us know please (invalidate_inode_pages2 doesn't do
363+the trick).
364+
365+Our current Unionfs code has to perform many file-revalidation calls. It
366+would be really nice if the VFS would export an optional file system hook
367+->file_revalidate (similarly to dentry->d_revalidate) that will be called
368+before each VFS op that has a "struct file" in it.
369+
370+Certain file systems have micro-second granularity (or better) for inode
371+times, and asynchronous actions could cause those times to change with some
372+small delay. In such cases, Unionfs may see a changed inode time that only
373+differs by a tiny fraction of a second: such a change may be a false
374+positive indication that the lower object has changed, whereas if unionfs
375+waits a little longer, that false indication will not be seen. (These false
376+positives are harmless, because they would at most cause unionfs to
377+re-validate an object that may need no revalidation, and print a debugging
378+message that clutters the console/logs.) Therefore, to minimize the chances
379+of these situations, we delay the detection of changed times by a small
380+factor of a few seconds, called UNIONFS_MIN_CC_TIME (which defaults to 3
381+seconds, as does NFS). This means that we will detect the change, only a
382+couple of seconds later, if indeed the time change persists in the lower
383+file object. This delayed detection has an added performance benefit: we
384+reduce the number of times that unionfs has to revalidate objects, in case
385+there's a lot of concurrent activity on both the upper and lower objects,
386+for the same file(s). Lastly, this delayed time attribute detection is
387+similar to how NFS clients operate (e.g., acregmin).
388+
389+Finally, there is no way currently in Linux to prevent lower directories
390+from being moved around (i.e., topology changes); there's no way to prevent
391+modifications to directory sub-trees of whole file systems which are mounted
392+read-write. It is therefore possible for in-flight operations in unionfs to
393+take place, while a lower directory is being moved around. Therefore, if
394+you try to, say, create a new file in a directory through unionfs, while the
395+directory is being moved around directly, then the new file may get created
396+in the new location where that directory was moved to. This is a somewhat
397+similar behaviour in NFS: an NFS client could be creating a new file while
398+th NFS server is moving th directory around; the file will get successfully
399+created in the new location. (The one exception in unionfs is that if the
400+branch is marked read-only by unionfs, then a copyup will take place.)
401+
402+For more information, see <http://unionfs.filesystems.org/>.
403diff --git a/Documentation/filesystems/unionfs/issues.txt b/Documentation/filesystems/unionfs/issues.txt
404new file mode 100644
405index 0000000..f4b7e7e
406--- /dev/null
407+++ b/Documentation/filesystems/unionfs/issues.txt
408@@ -0,0 +1,28 @@
409+KNOWN Unionfs 2.x ISSUES:
410+=========================
411+
412+1. Unionfs should not use lookup_one_len() on the underlying f/s as it
413+ confuses NFSv4. Currently, unionfs_lookup() passes lookup intents to the
414+ lower file-system, this eliminates part of the problem. The remaining
415+ calls to lookup_one_len may need to be changed to pass an intent. We are
416+ currently introducing VFS changes to fs/namei.c's do_path_lookup() to
417+ allow proper file lookup and opening in stackable file systems.
418+
419+2. Lockdep (a debugging feature) isn't aware of stacking, and so it
420+ incorrectly complains about locking problems. The problem boils down to
421+ this: Lockdep considers all objects of a certain type to be in the same
422+ class, for example, all inodes. Lockdep doesn't like to see a lock held
423+ on two inodes within the same task, and warns that it could lead to a
424+ deadlock. However, stackable file systems do precisely that: they lock
425+ an upper object, and then a lower object, in a strict order to avoid
426+ locking problems; in addition, Unionfs, as a fan-out file system, may
427+ have to lock several lower inodes. We are currently looking into Lockdep
428+ to see how to make it aware of stackable file systems. For now, we
429+ temporarily disable lockdep when calling vfs methods on lower objects,
430+ but only for those places where lockdep complained. While this solution
431+ may seem unclean, it is not without precedent: other places in the kernel
432+ also do similar temporary disabling, of course after carefully having
433+ checked that it is the right thing to do. Anyway, you get any warnings
434+ from Lockdep, please report them to the Unionfs maintainers.
435+
436+For more information, see <http://unionfs.filesystems.org/>.
437diff --git a/Documentation/filesystems/unionfs/rename.txt b/Documentation/filesystems/unionfs/rename.txt
438new file mode 100644
439index 0000000..e20bb82
440--- /dev/null
441+++ b/Documentation/filesystems/unionfs/rename.txt
442@@ -0,0 +1,31 @@
443+Rename is a complex beast. The following table shows which rename(2) operations
444+should succeed and which should fail.
445+
446+o: success
447+E: error (either unionfs or vfs)
448+X: EXDEV
449+
450+none = file does not exist
451+file = file is a file
452+dir = file is a empty directory
453+child= file is a non-empty directory
454+wh = file is a directory containing only whiteouts; this makes it logically
455+ empty
456+
457+ none file dir child wh
458+file o o E E E
459+dir o E o E o
460+child X E X E X
461+wh o E o E o
462+
463+
464+Renaming directories:
465+=====================
466+
467+Whenever a empty (either physically or logically) directory is being renamed,
468+the following sequence of events should take place:
469+
470+1) Remove whiteouts from both source and destination directory
471+2) Rename source to destination
472+3) Make destination opaque to prevent anything under it from showing up
473+
474diff --git a/Documentation/filesystems/unionfs/usage.txt b/Documentation/filesystems/unionfs/usage.txt
475new file mode 100644
476index 0000000..1adde69
477--- /dev/null
478+++ b/Documentation/filesystems/unionfs/usage.txt
479@@ -0,0 +1,134 @@
480+Unionfs is a stackable unification file system, which can appear to merge
481+the contents of several directories (branches), while keeping their physical
482+content separate. Unionfs is useful for unified source tree management,
483+merged contents of split CD-ROM, merged separate software package
484+directories, data grids, and more. Unionfs allows any mix of read-only and
485+read-write branches, as well as insertion and deletion of branches anywhere
486+in the fan-out. To maintain Unix semantics, Unionfs handles elimination of
487+duplicates, partial-error conditions, and more.
488+
489+GENERAL SYNTAX
490+==============
491+
492+# mount -t unionfs -o <OPTIONS>,<BRANCH-OPTIONS> none MOUNTPOINT
493+
494+OPTIONS can be any legal combination of:
495+
496+- ro # mount file system read-only
497+- rw # mount file system read-write
498+- remount # remount the file system (see Branch Management below)
499+- incgen # increment generation no. (see Cache Consistency below)
500+
501+BRANCH-OPTIONS can be either (1) a list of branches given to the "dirs="
502+option, or (2) a list of individual branch manipulation commands, combined
503+with the "remount" option, and is further described in the "Branch
504+Management" section below.
505+
506+The syntax for the "dirs=" mount option is:
507+
508+ dirs=branch[=ro|=rw][:...]
509+
510+The "dirs=" option takes a colon-delimited list of directories to compose
511+the union, with an optional branch mode for each of those directories.
512+Directories that come earlier (specified first, on the left) in the list
513+have a higher precedence than those which come later. Additionally,
514+read-only or read-write permissions of the branch can be specified by
515+appending =ro or =rw (default) to each directory. See the Copyup section in
516+concepts.txt, for a description of Unionfs's behavior when mixing read-only
517+and read-write branches and mounts.
518+
519+Syntax:
520+
521+ dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw]
522+
523+Example:
524+
525+ dirs=/writable_branch=rw:/read-only_branch=ro
526+
527+
528+BRANCH MANAGEMENT
529+=================
530+
531+Once you mount your union for the first time, using the "dirs=" option, you
532+can then change the union's overall mode or reconfigure the branches, using
533+the remount option, as follows.
534+
535+To downgrade a union from read-write to read-only:
536+
537+# mount -t unionfs -o remount,ro none MOUNTPOINT
538+
539+To upgrade a union from read-only to read-write:
540+
541+# mount -t unionfs -o remount,rw none MOUNTPOINT
542+
543+To delete a branch /foo, regardless where it is in the current union:
544+
545+# mount -t unionfs -o remount,del=/foo none MOUNTPOINT
546+
547+To insert (add) a branch /foo before /bar:
548+
549+# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT
550+
551+To insert (add) a branch /foo (with the "rw" mode flag) before /bar:
552+
553+# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT
554+
555+To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a
556+new highest-priority branch), you can use the above syntax, or use a short
557+hand version as follows:
558+
559+# mount -t unionfs -o remount,add=/foo none MOUNTPOINT
560+
561+To append a branch to the very end (new lowest-priority branch):
562+
563+# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT
564+
565+To append a branch to the very end (new lowest-priority branch), in
566+read-only mode:
567+
568+# mount -t unionfs -o remount,add=:/foo=ro none MOUNTPOINT
569+
570+Finally, to change the mode of one existing branch, say /foo, from read-only
571+to read-write, and change /bar from read-write to read-only:
572+
573+# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT
574+
575+Note: in Unionfs 2.x, you cannot set the leftmost branch to readonly because
576+then Unionfs won't have any writable place for copyups to take place.
577+Moreover, the VFS can get confused when it tries to modify something in a
578+file system mounted read-write, but isn't permitted to write to it.
579+Instead, you should set the whole union as readonly, as described above.
580+If, however, you must set the leftmost branch as readonly, perhaps so you
581+can get a snapshot of it at a point in time, then you should insert a new
582+writable top-level branch, and mark the one you want as readonly. This can
583+be accomplished as follows, assuming that /foo is your current leftmost
584+branch:
585+
586+# mount -t tmpfs -o size=NNN /new
587+# mount -t unionfs -o remount,add=/new,mode=/foo=ro none MOUNTPOINT
588+<do what you want safely in /foo>
589+# mount -t unionfs -o remount,del=/new,mode=/foo=rw none MOUNTPOINT
590+<check if there's anything in /new you want to preserve>
591+# umount /new
592+
593+CACHE CONSISTENCY
594+=================
595+
596+If you modify any file on any of the lower branches directly, while there is
597+a Unionfs 2.x mounted above any of those branches, you should tell Unionfs
598+to purge its caches and re-get the objects. To do that, you have to
599+increment the generation number of the superblock using the following
600+command:
601+
602+# mount -t unionfs -o remount,incgen none MOUNTPOINT
603+
604+Note that the older way of incrementing the generation number using an
605+ioctl, is no longer supported in Unionfs 2.0 and newer. Ioctls in general
606+are not encouraged. Plus, an ioctl is per-file concept, whereas the
607+generation number is a per-file-system concept. Worse, such an ioctl
608+requires an open file, which then has to be invalidated by the very nature
609+of the generation number increase (read: the old generation increase ioctl
610+was pretty racy).
611+
612+
613+For more information, see <http://unionfs.filesystems.org/>.
614diff --git a/MAINTAINERS b/MAINTAINERS
615index 69f19f1..fd88a30 100644
616--- a/MAINTAINERS
617+++ b/MAINTAINERS
618@@ -6319,6 +6319,14 @@ F: Documentation/cdrom/
619 F: drivers/cdrom/cdrom.c
620 F: include/linux/cdrom.h
621
622+UNIONFS
623+P: Erez Zadok
624+M: ezk@cs.sunysb.edu
625+L: unionfs@filesystems.org
626+W: http://unionfs.filesystems.org/
627+T: git git.kernel.org/pub/scm/linux/kernel/git/ezk/unionfs.git
628+S: Maintained
629+
630 UNSORTED BLOCK IMAGES (UBI)
631 M: Artem Bityutskiy <dedekind1@gmail.com>
632 W: http://www.linux-mtd.infradead.org/
633diff --git a/fs/Kconfig b/fs/Kconfig
634index f3aa9b0..0e6182c 100644
635--- a/fs/Kconfig
636+++ b/fs/Kconfig
637@@ -170,6 +170,7 @@ if MISC_FILESYSTEMS
638 source "fs/adfs/Kconfig"
639 source "fs/affs/Kconfig"
640 source "fs/ecryptfs/Kconfig"
641+source "fs/unionfs/Kconfig"
642 source "fs/hfs/Kconfig"
643 source "fs/hfsplus/Kconfig"
644 source "fs/befs/Kconfig"
645diff --git a/fs/Makefile b/fs/Makefile
646index fb68c2b..8ca9290 100644
647--- a/fs/Makefile
648+++ b/fs/Makefile
649@@ -83,6 +83,7 @@ obj-$(CONFIG_ISO9660_FS) += isofs/
650 obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
651 obj-$(CONFIG_HFS_FS) += hfs/
652 obj-$(CONFIG_ECRYPT_FS) += ecryptfs/
653+obj-$(CONFIG_UNION_FS) += unionfs/
654 obj-$(CONFIG_VXFS_FS) += freevxfs/
655 obj-$(CONFIG_NFS_FS) += nfs/
656 obj-$(CONFIG_EXPORTFS) += exportfs/
657diff --git a/fs/namei.c b/fs/namei.c
658index e3c4f11..d9f99a4 100644
659--- a/fs/namei.c
660+++ b/fs/namei.c
661@@ -578,6 +578,7 @@ void release_open_intent(struct nameidata *nd)
662 fput(file);
663 }
664 }
665+EXPORT_SYMBOL_GPL(release_open_intent);
666
667 static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
668 {
669@@ -1819,6 +1820,42 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
670 return __lookup_hash(&this, base, NULL);
671 }
672
673+/* pass nameidata from caller (useful for NFS) */
674+struct dentry *lookup_one_len_nd(const char *name, struct dentry *base,
675+ int len, struct nameidata *nd)
676+{
677+ struct qstr this;
678+ unsigned long hash;
679+ unsigned int c;
680+
681+ WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
682+
683+ this.name = name;
684+ this.len = len;
685+ if (!len)
686+ return ERR_PTR(-EACCES);
687+
688+ hash = init_name_hash();
689+ while (len--) {
690+ c = *(const unsigned char *)name++;
691+ if (c == '/' || c == '\0')
692+ return ERR_PTR(-EACCES);
693+ hash = partial_name_hash(c, hash);
694+ }
695+ this.hash = end_name_hash(hash);
696+ /*
697+ * See if the low-level filesystem might want
698+ * to use its own hash..
699+ */
700+ if (base->d_flags & DCACHE_OP_HASH) {
701+ int err = base->d_op->d_hash(base, base->d_inode, &this);
702+ if (err < 0)
703+ return ERR_PTR(err);
704+ }
705+
706+ return __lookup_hash(&this, base, nd);
707+}
708+
709 int user_path_at(int dfd, const char __user *name, unsigned flags,
710 struct path *path)
711 {
712@@ -3422,6 +3459,7 @@ EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
713 EXPORT_SYMBOL(getname);
714 EXPORT_SYMBOL(lock_rename);
715 EXPORT_SYMBOL(lookup_one_len);
716+EXPORT_SYMBOL(lookup_one_len_nd);
717 EXPORT_SYMBOL(page_follow_link_light);
718 EXPORT_SYMBOL(page_put_link);
719 EXPORT_SYMBOL(page_readlink);
720diff --git a/fs/splice.c b/fs/splice.c
721index 50a5d97..a3af841 100644
722--- a/fs/splice.c
723+++ b/fs/splice.c
724@@ -1081,8 +1081,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
725 /*
726 * Attempt to initiate a splice from pipe to file.
727 */
728-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
729- loff_t *ppos, size_t len, unsigned int flags)
730+long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out,
731+ loff_t *ppos, size_t len, unsigned int flags)
732 {
733 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
734 loff_t *, size_t, unsigned int);
735@@ -1105,13 +1105,14 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
736
737 return splice_write(pipe, out, ppos, len, flags);
738 }
739+EXPORT_SYMBOL_GPL(vfs_splice_from);
740
741 /*
742 * Attempt to initiate a splice from a file to a pipe.
743 */
744-static long do_splice_to(struct file *in, loff_t *ppos,
745- struct pipe_inode_info *pipe, size_t len,
746- unsigned int flags)
747+long vfs_splice_to(struct file *in, loff_t *ppos,
748+ struct pipe_inode_info *pipe, size_t len,
749+ unsigned int flags)
750 {
751 ssize_t (*splice_read)(struct file *, loff_t *,
752 struct pipe_inode_info *, size_t, unsigned int);
753@@ -1131,6 +1132,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
754
755 return splice_read(in, ppos, pipe, len, flags);
756 }
757+EXPORT_SYMBOL_GPL(vfs_splice_to);
758
759 /**
760 * splice_direct_to_actor - splices data directly between two non-pipes
761@@ -1200,7 +1202,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
762 size_t read_len;
763 loff_t pos = sd->pos, prev_pos = pos;
764
765- ret = do_splice_to(in, &pos, pipe, len, flags);
766+ ret = vfs_splice_to(in, &pos, pipe, len, flags);
767 if (unlikely(ret <= 0))
768 goto out_release;
769
770@@ -1259,8 +1261,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
771 {
772 struct file *file = sd->u.file;
773
774- return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
775- sd->flags);
776+ return vfs_splice_from(pipe, file, &file->f_pos, sd->total_len,
777+ sd->flags);
778 }
779
780 /**
781@@ -1345,7 +1347,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
782 } else
783 off = &out->f_pos;
784
785- ret = do_splice_from(ipipe, out, off, len, flags);
786+ ret = vfs_splice_from(ipipe, out, off, len, flags);
787
788 if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
789 ret = -EFAULT;
790@@ -1365,7 +1367,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
791 } else
792 off = &in->f_pos;
793
794- ret = do_splice_to(in, off, opipe, len, flags);
795+ ret = vfs_splice_to(in, off, opipe, len, flags);
796
797 if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
798 ret = -EFAULT;
799diff --git a/fs/stack.c b/fs/stack.c
800index 4a6f7f4..7eeef12 100644
801--- a/fs/stack.c
802+++ b/fs/stack.c
803@@ -1,8 +1,20 @@
804+/*
805+ * Copyright (c) 2006-2009 Erez Zadok
806+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
807+ * Copyright (c) 2006-2009 Stony Brook University
808+ * Copyright (c) 2006-2009 The Research Foundation of SUNY
809+ *
810+ * This program is free software; you can redistribute it and/or modify
811+ * it under the terms of the GNU General Public License version 2 as
812+ * published by the Free Software Foundation.
813+ */
814+
815 #include <linux/module.h>
816 #include <linux/fs.h>
817 #include <linux/fs_stack.h>
818
819-/* does _NOT_ require i_mutex to be held.
820+/*
821+ * does _NOT_ require i_mutex to be held.
822 *
823 * This function cannot be inlined since i_size_{read,write} is rather
824 * heavy-weight on 32-bit systems
825diff --git a/fs/unionfs/Kconfig b/fs/unionfs/Kconfig
826new file mode 100644
827index 0000000..f3c1ac4
828--- /dev/null
829+++ b/fs/unionfs/Kconfig
830@@ -0,0 +1,24 @@
831+config UNION_FS
832+ tristate "Union file system (EXPERIMENTAL)"
833+ depends on EXPERIMENTAL
834+ help
835+ Unionfs is a stackable unification file system, which appears to
836+ merge the contents of several directories (branches), while keeping
837+ their physical content separate.
838+
839+ See <http://unionfs.filesystems.org> for details
840+
841+config UNION_FS_XATTR
842+ bool "Unionfs extended attributes"
843+ depends on UNION_FS
844+ help
845+ Extended attributes are name:value pairs associated with inodes by
846+ the kernel or by users (see the attr(5) manual page).
847+
848+ If unsure, say N.
849+
850+config UNION_FS_DEBUG
851+ bool "Debug Unionfs"
852+ depends on UNION_FS
853+ help
854+ If you say Y here, you can turn on debugging output from Unionfs.
855diff --git a/fs/unionfs/Makefile b/fs/unionfs/Makefile
856new file mode 100644
857index 0000000..3e31847
858--- /dev/null
859+++ b/fs/unionfs/Makefile
860@@ -0,0 +1,17 @@
861+UNIONFS_VERSION="2.5.9 (for 2.6.39-rc5)"
862+
863+EXTRA_CFLAGS += -DUNIONFS_VERSION=\"$(UNIONFS_VERSION)\"
864+
865+obj-$(CONFIG_UNION_FS) += unionfs.o
866+
867+unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
868+ rdstate.o copyup.o dirhelper.o rename.o unlink.o \
869+ lookup.o commonfops.o dirfops.o sioq.o mmap.o whiteout.o
870+
871+unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
872+
873+unionfs-$(CONFIG_UNION_FS_DEBUG) += debug.o
874+
875+ifeq ($(CONFIG_UNION_FS_DEBUG),y)
876+EXTRA_CFLAGS += -DDEBUG
877+endif
878diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
879new file mode 100644
880index 0000000..9f63b1c
881--- /dev/null
882+++ b/fs/unionfs/commonfops.c
883@@ -0,0 +1,898 @@
884+/*
885+ * Copyright (c) 2003-2011 Erez Zadok
886+ * Copyright (c) 2003-2006 Charles P. Wright
887+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
888+ * Copyright (c) 2005-2006 Junjiro Okajima
889+ * Copyright (c) 2005 Arun M. Krishnakumar
890+ * Copyright (c) 2004-2006 David P. Quigley
891+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
892+ * Copyright (c) 2003 Puja Gupta
893+ * Copyright (c) 2003 Harikesavan Krishnan
894+ * Copyright (c) 2003-2011 Stony Brook University
895+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
896+ *
897+ * This program is free software; you can redistribute it and/or modify
898+ * it under the terms of the GNU General Public License version 2 as
899+ * published by the Free Software Foundation.
900+ */
901+
902+#include "union.h"
903+
904+/*
905+ * 1) Copyup the file
906+ * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously
907+ * stolen from NFS's silly rename
908+ */
909+static int copyup_deleted_file(struct file *file, struct dentry *dentry,
910+ struct dentry *parent, int bstart, int bindex)
911+{
912+ static unsigned int counter;
913+ const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2;
914+ const int countersize = sizeof(counter) * 2;
915+ const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1;
916+ char name[nlen + 1];
917+ int err;
918+ struct dentry *tmp_dentry = NULL;
919+ struct dentry *lower_dentry;
920+ struct dentry *lower_dir_dentry = NULL;
921+
922+ lower_dentry = unionfs_lower_dentry_idx(dentry, bstart);
923+
924+ sprintf(name, ".unionfs%*.*lx",
925+ i_inosize, i_inosize, lower_dentry->d_inode->i_ino);
926+
927+ /*
928+ * Loop, looking for an unused temp name to copyup to.
929+ *
930+ * It's somewhat silly that we look for a free temp tmp name in the
931+ * source branch (bstart) instead of the dest branch (bindex), where
932+ * the final name will be created. We _will_ catch it if somehow
933+ * the name exists in the dest branch, but it'd be nice to catch it
934+ * sooner than later.
935+ */
936+retry:
937+ tmp_dentry = NULL;
938+ do {
939+ char *suffix = name + nlen - countersize;
940+
941+ dput(tmp_dentry);
942+ counter++;
943+ sprintf(suffix, "%*.*x", countersize, countersize, counter);
944+
945+ pr_debug("unionfs: trying to rename %s to %s\n",
946+ dentry->d_name.name, name);
947+
948+ tmp_dentry = lookup_lck_len(name, lower_dentry->d_parent,
949+ nlen);
950+ if (IS_ERR(tmp_dentry)) {
951+ err = PTR_ERR(tmp_dentry);
952+ goto out;
953+ }
954+ } while (tmp_dentry->d_inode != NULL); /* need negative dentry */
955+ dput(tmp_dentry);
956+
957+ err = copyup_named_file(parent->d_inode, file, name, bstart, bindex,
958+ i_size_read(file->f_path.dentry->d_inode));
959+ if (err) {
960+ if (unlikely(err == -EEXIST))
961+ goto retry;
962+ goto out;
963+ }
964+
965+ /* bring it to the same state as an unlinked file */
966+ lower_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry));
967+ if (!unionfs_lower_inode_idx(dentry->d_inode, bindex)) {
968+ atomic_inc(&lower_dentry->d_inode->i_count);
969+ unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
970+ lower_dentry->d_inode);
971+ }
972+ lower_dir_dentry = lock_parent(lower_dentry);
973+ err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
974+ unlock_dir(lower_dir_dentry);
975+
976+out:
977+ if (!err)
978+ unionfs_check_dentry(dentry);
979+ return err;
980+}
981+
982+/*
983+ * put all references held by upper struct file and free lower file pointer
984+ * array
985+ */
986+static void cleanup_file(struct file *file)
987+{
988+ int bindex, bstart, bend;
989+ struct file **lower_files;
990+ struct file *lower_file;
991+ struct super_block *sb = file->f_path.dentry->d_sb;
992+
993+ lower_files = UNIONFS_F(file)->lower_files;
994+ bstart = fbstart(file);
995+ bend = fbend(file);
996+
997+ for (bindex = bstart; bindex <= bend; bindex++) {
998+ int i; /* holds (possibly) updated branch index */
999+ int old_bid;
1000+
1001+ lower_file = unionfs_lower_file_idx(file, bindex);
1002+ if (!lower_file)
1003+ continue;
1004+
1005+ /*
1006+ * Find new index of matching branch with an open
1007+ * file, since branches could have been added or
1008+ * deleted causing the one with open files to shift.
1009+ */
1010+ old_bid = UNIONFS_F(file)->saved_branch_ids[bindex];
1011+ i = branch_id_to_idx(sb, old_bid);
1012+ if (unlikely(i < 0)) {
1013+ printk(KERN_ERR "unionfs: no superblock for "
1014+ "file %p\n", file);
1015+ continue;
1016+ }
1017+
1018+ /* decrement count of open files */
1019+ branchput(sb, i);
1020+ /*
1021+ * fput will perform an mntput for us on the correct branch.
1022+ * Although we're using the file's old branch configuration,
1023+ * bindex, which is the old index, correctly points to the
1024+ * right branch in the file's branch list. In other words,
1025+ * we're going to mntput the correct branch even if branches
1026+ * have been added/removed.
1027+ */
1028+ fput(lower_file);
1029+ UNIONFS_F(file)->lower_files[bindex] = NULL;
1030+ UNIONFS_F(file)->saved_branch_ids[bindex] = -1;
1031+ }
1032+
1033+ UNIONFS_F(file)->lower_files = NULL;
1034+ kfree(lower_files);
1035+ kfree(UNIONFS_F(file)->saved_branch_ids);
1036+ /* set to NULL because caller needs to know if to kfree on error */
1037+ UNIONFS_F(file)->saved_branch_ids = NULL;
1038+}
1039+
1040+/* open all lower files for a given file */
1041+static int open_all_files(struct file *file)
1042+{
1043+ int bindex, bstart, bend, err = 0;
1044+ struct file *lower_file;
1045+ struct dentry *lower_dentry;
1046+ struct dentry *dentry = file->f_path.dentry;
1047+ struct super_block *sb = dentry->d_sb;
1048+
1049+ bstart = dbstart(dentry);
1050+ bend = dbend(dentry);
1051+
1052+ for (bindex = bstart; bindex <= bend; bindex++) {
1053+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
1054+ if (!lower_dentry)
1055+ continue;
1056+
1057+ dget(lower_dentry);
1058+ unionfs_mntget(dentry, bindex);
1059+ branchget(sb, bindex);
1060+
1061+ lower_file =
1062+ dentry_open(lower_dentry,
1063+ unionfs_lower_mnt_idx(dentry, bindex),
1064+ file->f_flags, current_cred());
1065+ if (IS_ERR(lower_file)) {
1066+ branchput(sb, bindex);
1067+ err = PTR_ERR(lower_file);
1068+ goto out;
1069+ } else {
1070+ unionfs_set_lower_file_idx(file, bindex, lower_file);
1071+ }
1072+ }
1073+out:
1074+ return err;
1075+}
1076+
1077+/* open the highest priority file for a given upper file */
1078+static int open_highest_file(struct file *file, bool willwrite)
1079+{
1080+ int bindex, bstart, bend, err = 0;
1081+ struct file *lower_file;
1082+ struct dentry *lower_dentry;
1083+ struct dentry *dentry = file->f_path.dentry;
1084+ struct dentry *parent = dget_parent(dentry);
1085+ struct inode *parent_inode = parent->d_inode;
1086+ struct super_block *sb = dentry->d_sb;
1087+
1088+ bstart = dbstart(dentry);
1089+ bend = dbend(dentry);
1090+
1091+ lower_dentry = unionfs_lower_dentry(dentry);
1092+ if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) {
1093+ for (bindex = bstart - 1; bindex >= 0; bindex--) {
1094+ err = copyup_file(parent_inode, file, bstart, bindex,
1095+ i_size_read(dentry->d_inode));
1096+ if (!err)
1097+ break;
1098+ }
1099+ atomic_set(&UNIONFS_F(file)->generation,
1100+ atomic_read(&UNIONFS_I(dentry->d_inode)->
1101+ generation));
1102+ goto out;
1103+ }
1104+
1105+ dget(lower_dentry);
1106+ unionfs_mntget(dentry, bstart);
1107+ lower_file = dentry_open(lower_dentry,
1108+ unionfs_lower_mnt_idx(dentry, bstart),
1109+ file->f_flags, current_cred());
1110+ if (IS_ERR(lower_file)) {
1111+ err = PTR_ERR(lower_file);
1112+ goto out;
1113+ }
1114+ branchget(sb, bstart);
1115+ unionfs_set_lower_file(file, lower_file);
1116+ /* Fix up the position. */
1117+ lower_file->f_pos = file->f_pos;
1118+
1119+ memcpy(&lower_file->f_ra, &file->f_ra, sizeof(struct file_ra_state));
1120+out:
1121+ dput(parent);
1122+ return err;
1123+}
1124+
1125+/* perform a delayed copyup of a read-write file on a read-only branch */
1126+static int do_delayed_copyup(struct file *file, struct dentry *parent)
1127+{
1128+ int bindex, bstart, bend, err = 0;
1129+ struct dentry *dentry = file->f_path.dentry;
1130+ struct inode *parent_inode = parent->d_inode;
1131+
1132+ bstart = fbstart(file);
1133+ bend = fbend(file);
1134+
1135+ BUG_ON(!S_ISREG(dentry->d_inode->i_mode));
1136+
1137+ unionfs_check_file(file);
1138+ for (bindex = bstart - 1; bindex >= 0; bindex--) {
1139+ if (!d_deleted(dentry))
1140+ err = copyup_file(parent_inode, file, bstart,
1141+ bindex,
1142+ i_size_read(dentry->d_inode));
1143+ else
1144+ err = copyup_deleted_file(file, dentry, parent,
1145+ bstart, bindex);
1146+ /* if succeeded, set lower open-file flags and break */
1147+ if (!err) {
1148+ struct file *lower_file;
1149+ lower_file = unionfs_lower_file_idx(file, bindex);
1150+ lower_file->f_flags = file->f_flags;
1151+ break;
1152+ }
1153+ }
1154+ if (err || (bstart <= fbstart(file)))
1155+ goto out;
1156+ bend = fbend(file);
1157+ for (bindex = bstart; bindex <= bend; bindex++) {
1158+ if (unionfs_lower_file_idx(file, bindex)) {
1159+ branchput(dentry->d_sb, bindex);
1160+ fput(unionfs_lower_file_idx(file, bindex));
1161+ unionfs_set_lower_file_idx(file, bindex, NULL);
1162+ }
1163+ }
1164+ path_put_lowers(dentry, bstart, bend, false);
1165+ iput_lowers(dentry->d_inode, bstart, bend, false);
1166+ /* for reg file, we only open it "once" */
1167+ fbend(file) = fbstart(file);
1168+ dbend(dentry) = dbstart(dentry);
1169+ ibend(dentry->d_inode) = ibstart(dentry->d_inode);
1170+
1171+out:
1172+ unionfs_check_file(file);
1173+ return err;
1174+}
1175+
1176+/*
1177+ * Helper function for unionfs_file_revalidate/locked.
1178+ * Expects dentry/parent to be locked already, and revalidated.
1179+ */
1180+static int __unionfs_file_revalidate(struct file *file, struct dentry *dentry,
1181+ struct dentry *parent,
1182+ struct super_block *sb, int sbgen,
1183+ int dgen, bool willwrite)
1184+{
1185+ int fgen;
1186+ int bstart, bend, orig_brid;
1187+ int size;
1188+ int err = 0;
1189+
1190+ fgen = atomic_read(&UNIONFS_F(file)->generation);
1191+
1192+ /*
1193+ * There are two cases we are interested in. The first is if the
1194+ * generation is lower than the super-block. The second is if
1195+ * someone has copied up this file from underneath us, we also need
1196+ * to refresh things.
1197+ */
1198+ if ((d_deleted(dentry) && dbstart(dentry) >= fbstart(file)) ||
1199+ (sbgen <= fgen &&
1200+ dbstart(dentry) == fbstart(file) &&
1201+ unionfs_lower_file(file)))
1202+ goto out_may_copyup;
1203+
1204+ /* save orig branch ID */
1205+ orig_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1206+
1207+ /* First we throw out the existing files. */
1208+ cleanup_file(file);
1209+
1210+ /* Now we reopen the file(s) as in unionfs_open. */
1211+ bstart = fbstart(file) = dbstart(dentry);
1212+ bend = fbend(file) = dbend(dentry);
1213+
1214+ size = sizeof(struct file *) * sbmax(sb);
1215+ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1216+ if (unlikely(!UNIONFS_F(file)->lower_files)) {
1217+ err = -ENOMEM;
1218+ goto out;
1219+ }
1220+ size = sizeof(int) * sbmax(sb);
1221+ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1222+ if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1223+ err = -ENOMEM;
1224+ goto out;
1225+ }
1226+
1227+ if (S_ISDIR(dentry->d_inode->i_mode)) {
1228+ /* We need to open all the files. */
1229+ err = open_all_files(file);
1230+ if (err)
1231+ goto out;
1232+ } else {
1233+ int new_brid;
1234+ /* We only open the highest priority branch. */
1235+ err = open_highest_file(file, willwrite);
1236+ if (err)
1237+ goto out;
1238+ new_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1239+ if (unlikely(new_brid != orig_brid && sbgen > fgen)) {
1240+ /*
1241+ * If we re-opened the file on a different branch
1242+ * than the original one, and this was due to a new
1243+ * branch inserted, then update the mnt counts of
1244+ * the old and new branches accordingly.
1245+ */
1246+ unionfs_mntget(dentry, bstart);
1247+ unionfs_mntput(sb->s_root,
1248+ branch_id_to_idx(sb, orig_brid));
1249+ }
1250+ /* regular files have only one open lower file */
1251+ fbend(file) = fbstart(file);
1252+ }
1253+ atomic_set(&UNIONFS_F(file)->generation,
1254+ atomic_read(&UNIONFS_I(dentry->d_inode)->generation));
1255+
1256+out_may_copyup:
1257+ /* Copyup on the first write to a file on a readonly branch. */
1258+ if (willwrite && IS_WRITE_FLAG(file->f_flags) &&
1259+ !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) &&
1260+ is_robranch(dentry)) {
1261+ pr_debug("unionfs: do delay copyup of \"%s\"\n",
1262+ dentry->d_name.name);
1263+ err = do_delayed_copyup(file, parent);
1264+ /* regular files have only one open lower file */
1265+ if (!err && !S_ISDIR(dentry->d_inode->i_mode))
1266+ fbend(file) = fbstart(file);
1267+ }
1268+
1269+out:
1270+ if (err) {
1271+ kfree(UNIONFS_F(file)->lower_files);
1272+ kfree(UNIONFS_F(file)->saved_branch_ids);
1273+ }
1274+ return err;
1275+}
1276+
1277+/*
1278+ * Revalidate the struct file
1279+ * @file: file to revalidate
1280+ * @parent: parent dentry (locked by caller)
1281+ * @willwrite: true if caller may cause changes to the file; false otherwise.
1282+ * Caller must lock/unlock dentry's branch configuration.
1283+ */
1284+int unionfs_file_revalidate(struct file *file, struct dentry *parent,
1285+ bool willwrite)
1286+{
1287+ struct super_block *sb;
1288+ struct dentry *dentry;
1289+ int sbgen, dgen;
1290+ int err = 0;
1291+
1292+ dentry = file->f_path.dentry;
1293+ sb = dentry->d_sb;
1294+ verify_locked(dentry);
1295+ verify_locked(parent);
1296+
1297+ /*
1298+ * First revalidate the dentry inside struct file,
1299+ * but not unhashed dentries.
1300+ */
1301+ if (!d_deleted(dentry) &&
1302+ !__unionfs_d_revalidate(dentry, parent, willwrite)) {
1303+ err = -ESTALE;
1304+ goto out;
1305+ }
1306+
1307+ sbgen = atomic_read(&UNIONFS_SB(sb)->generation);
1308+ dgen = atomic_read(&UNIONFS_D(dentry)->generation);
1309+
1310+ if (unlikely(sbgen > dgen)) { /* XXX: should never happen */
1311+ pr_debug("unionfs: failed to revalidate dentry (%s)\n",
1312+ dentry->d_name.name);
1313+ err = -ESTALE;
1314+ goto out;
1315+ }
1316+
1317+ err = __unionfs_file_revalidate(file, dentry, parent, sb,
1318+ sbgen, dgen, willwrite);
1319+out:
1320+ return err;
1321+}
1322+
1323+/* unionfs_open helper function: open a directory */
1324+static int __open_dir(struct inode *inode, struct file *file)
1325+{
1326+ struct dentry *lower_dentry;
1327+ struct file *lower_file;
1328+ int bindex, bstart, bend;
1329+ struct vfsmount *mnt;
1330+
1331+ bstart = fbstart(file) = dbstart(file->f_path.dentry);
1332+ bend = fbend(file) = dbend(file->f_path.dentry);
1333+
1334+ for (bindex = bstart; bindex <= bend; bindex++) {
1335+ lower_dentry =
1336+ unionfs_lower_dentry_idx(file->f_path.dentry, bindex);
1337+ if (!lower_dentry)
1338+ continue;
1339+
1340+ dget(lower_dentry);
1341+ unionfs_mntget(file->f_path.dentry, bindex);
1342+ mnt = unionfs_lower_mnt_idx(file->f_path.dentry, bindex);
1343+ lower_file = dentry_open(lower_dentry, mnt, file->f_flags,
1344+ current_cred());
1345+ if (IS_ERR(lower_file))
1346+ return PTR_ERR(lower_file);
1347+
1348+ unionfs_set_lower_file_idx(file, bindex, lower_file);
1349+
1350+ /*
1351+ * The branchget goes after the open, because otherwise
1352+ * we would miss the reference on release.
1353+ */
1354+ branchget(inode->i_sb, bindex);
1355+ }
1356+
1357+ return 0;
1358+}
1359+
1360+/* unionfs_open helper function: open a file */
1361+static int __open_file(struct inode *inode, struct file *file,
1362+ struct dentry *parent)
1363+{
1364+ struct dentry *lower_dentry;
1365+ struct file *lower_file;
1366+ int lower_flags;
1367+ int bindex, bstart, bend;
1368+
1369+ lower_dentry = unionfs_lower_dentry(file->f_path.dentry);
1370+ lower_flags = file->f_flags;
1371+
1372+ bstart = fbstart(file) = dbstart(file->f_path.dentry);
1373+ bend = fbend(file) = dbend(file->f_path.dentry);
1374+
1375+ /*
1376+ * check for the permission for lower file. If the error is
1377+ * COPYUP_ERR, copyup the file.
1378+ */
1379+ if (lower_dentry->d_inode && is_robranch(file->f_path.dentry)) {
1380+ /*
1381+ * if the open will change the file, copy it up otherwise
1382+ * defer it.
1383+ */
1384+ if (lower_flags & O_TRUNC) {
1385+ int size = 0;
1386+ int err = -EROFS;
1387+
1388+ /* copyup the file */
1389+ for (bindex = bstart - 1; bindex >= 0; bindex--) {
1390+ err = copyup_file(parent->d_inode, file,
1391+ bstart, bindex, size);
1392+ if (!err) {
1393+ /* only one regular file open */
1394+ fbend(file) = fbstart(file);
1395+ break;
1396+ }
1397+ }
1398+ return err;
1399+ } else {
1400+ /*
1401+ * turn off writeable flags, to force delayed copyup
1402+ * by caller.
1403+ */
1404+ lower_flags &= ~(OPEN_WRITE_FLAGS);
1405+ }
1406+ }
1407+
1408+ dget(lower_dentry);
1409+
1410+ /*
1411+ * dentry_open will decrement mnt refcnt if err.
1412+ * otherwise fput() will do an mntput() for us upon file close.
1413+ */
1414+ unionfs_mntget(file->f_path.dentry, bstart);
1415+ lower_file =
1416+ dentry_open(lower_dentry,
1417+ unionfs_lower_mnt_idx(file->f_path.dentry, bstart),
1418+ lower_flags, current_cred());
1419+ if (IS_ERR(lower_file))
1420+ return PTR_ERR(lower_file);
1421+
1422+ unionfs_set_lower_file(file, lower_file);
1423+ branchget(inode->i_sb, bstart);
1424+
1425+ return 0;
1426+}
1427+
1428+int unionfs_open(struct inode *inode, struct file *file)
1429+{
1430+ int err = 0;
1431+ struct file *lower_file = NULL;
1432+ struct dentry *dentry = file->f_path.dentry;
1433+ struct dentry *parent;
1434+ int bindex = 0, bstart = 0, bend = 0;
1435+ int size;
1436+ int valid = 0;
1437+
1438+ unionfs_read_lock(inode->i_sb, UNIONFS_SMUTEX_PARENT);
1439+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1440+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1441+
1442+ /* don't open unhashed/deleted files */
1443+ if (d_deleted(dentry)) {
1444+ err = -ENOENT;
1445+ goto out_nofree;
1446+ }
1447+
1448+ /* XXX: should I change 'false' below to the 'willwrite' flag? */
1449+ valid = __unionfs_d_revalidate(dentry, parent, false);
1450+ if (unlikely(!valid)) {
1451+ err = -ESTALE;
1452+ goto out_nofree;
1453+ }
1454+
1455+ file->private_data =
1456+ kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL);
1457+ if (unlikely(!UNIONFS_F(file))) {
1458+ err = -ENOMEM;
1459+ goto out_nofree;
1460+ }
1461+ fbstart(file) = -1;
1462+ fbend(file) = -1;
1463+ atomic_set(&UNIONFS_F(file)->generation,
1464+ atomic_read(&UNIONFS_I(inode)->generation));
1465+
1466+ size = sizeof(struct file *) * sbmax(inode->i_sb);
1467+ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1468+ if (unlikely(!UNIONFS_F(file)->lower_files)) {
1469+ err = -ENOMEM;
1470+ goto out;
1471+ }
1472+ size = sizeof(int) * sbmax(inode->i_sb);
1473+ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1474+ if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1475+ err = -ENOMEM;
1476+ goto out;
1477+ }
1478+
1479+ bstart = fbstart(file) = dbstart(dentry);
1480+ bend = fbend(file) = dbend(dentry);
1481+
1482+ /*
1483+ * open all directories and make the unionfs file struct point to
1484+ * these lower file structs
1485+ */
1486+ if (S_ISDIR(inode->i_mode))
1487+ err = __open_dir(inode, file); /* open a dir */
1488+ else
1489+ err = __open_file(inode, file, parent); /* open a file */
1490+
1491+ /* freeing the allocated resources, and fput the opened files */
1492+ if (err) {
1493+ for (bindex = bstart; bindex <= bend; bindex++) {
1494+ lower_file = unionfs_lower_file_idx(file, bindex);
1495+ if (!lower_file)
1496+ continue;
1497+
1498+ branchput(dentry->d_sb, bindex);
1499+ /* fput calls dput for lower_dentry */
1500+ fput(lower_file);
1501+ }
1502+ }
1503+
1504+out:
1505+ if (err) {
1506+ kfree(UNIONFS_F(file)->lower_files);
1507+ kfree(UNIONFS_F(file)->saved_branch_ids);
1508+ kfree(UNIONFS_F(file));
1509+ }
1510+out_nofree:
1511+ if (!err) {
1512+ unionfs_postcopyup_setmnt(dentry);
1513+ unionfs_copy_attr_times(inode);
1514+ unionfs_check_file(file);
1515+ unionfs_check_inode(inode);
1516+ }
1517+ unionfs_unlock_dentry(dentry);
1518+ unionfs_unlock_parent(dentry, parent);
1519+ unionfs_read_unlock(inode->i_sb);
1520+ return err;
1521+}
1522+
1523+/*
1524+ * release all lower object references & free the file info structure
1525+ *
1526+ * No need to grab sb info's rwsem.
1527+ */
1528+int unionfs_file_release(struct inode *inode, struct file *file)
1529+{
1530+ struct file *lower_file = NULL;
1531+ struct unionfs_file_info *fileinfo;
1532+ struct unionfs_inode_info *inodeinfo;
1533+ struct super_block *sb = inode->i_sb;
1534+ struct dentry *dentry = file->f_path.dentry;
1535+ struct dentry *parent;
1536+ int bindex, bstart, bend;
1537+ int err = 0;
1538+
1539+ /*
1540+ * Since mm/memory.c:might_fault() (under PROVE_LOCKING) was
1541+ * modified in 2.6.29-rc1 to call might_lock_read on mmap_sem, this
1542+ * has been causing false positives in file system stacking layers.
1543+ * In particular, our ->mmap is called after sys_mmap2 already holds
1544+ * mmap_sem, then we lock our own mutexes; but earlier, it's
1545+ * possible for lockdep to have locked our mutexes first, and then
1546+ * we call a lower ->readdir which could call might_fault. The
1547+ * different ordering of the locks is what lockdep complains about
1548+ * -- unnecessarily. Therefore, we have no choice but to tell
1549+ * lockdep to temporarily turn off lockdep here. Note: the comments
1550+ * inside might_sleep also suggest that it would have been
1551+ * nicer to only annotate paths that needs that might_lock_read.
1552+ */
1553+ lockdep_off();
1554+ unionfs_read_lock(sb, UNIONFS_SMUTEX_PARENT);
1555+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1556+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1557+
1558+ /*
1559+ * We try to revalidate, but the VFS ignores return return values
1560+ * from file->release, so we must always try to succeed here,
1561+ * including to do the kfree and dput below. So if revalidation
1562+ * failed, all we can do is print some message and keep going.
1563+ */
1564+ err = unionfs_file_revalidate(file, parent,
1565+ UNIONFS_F(file)->wrote_to_file);
1566+ if (!err)
1567+ unionfs_check_file(file);
1568+ fileinfo = UNIONFS_F(file);
1569+ BUG_ON(file->f_path.dentry->d_inode != inode);
1570+ inodeinfo = UNIONFS_I(inode);
1571+
1572+ /* fput all the lower files */
1573+ bstart = fbstart(file);
1574+ bend = fbend(file);
1575+
1576+ for (bindex = bstart; bindex <= bend; bindex++) {
1577+ lower_file = unionfs_lower_file_idx(file, bindex);
1578+
1579+ if (lower_file) {
1580+ unionfs_set_lower_file_idx(file, bindex, NULL);
1581+ fput(lower_file);
1582+ branchput(sb, bindex);
1583+ }
1584+
1585+ /* if there are no more refs to the dentry, dput it */
1586+ if (d_deleted(dentry)) {
1587+ dput(unionfs_lower_dentry_idx(dentry, bindex));
1588+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1589+ }
1590+ }
1591+
1592+ kfree(fileinfo->lower_files);
1593+ kfree(fileinfo->saved_branch_ids);
1594+
1595+ if (fileinfo->rdstate) {
1596+ fileinfo->rdstate->access = jiffies;
1597+ spin_lock(&inodeinfo->rdlock);
1598+ inodeinfo->rdcount++;
1599+ list_add_tail(&fileinfo->rdstate->cache,
1600+ &inodeinfo->readdircache);
1601+ mark_inode_dirty(inode);
1602+ spin_unlock(&inodeinfo->rdlock);
1603+ fileinfo->rdstate = NULL;
1604+ }
1605+ kfree(fileinfo);
1606+
1607+ unionfs_unlock_dentry(dentry);
1608+ unionfs_unlock_parent(dentry, parent);
1609+ unionfs_read_unlock(sb);
1610+ lockdep_on();
1611+ return err;
1612+}
1613+
1614+/* pass the ioctl to the lower fs */
1615+static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1616+{
1617+ struct file *lower_file;
1618+ int err;
1619+
1620+ lower_file = unionfs_lower_file(file);
1621+
1622+ err = -ENOTTY;
1623+ if (!lower_file || !lower_file->f_op)
1624+ goto out;
1625+ if (lower_file->f_op->unlocked_ioctl) {
1626+ err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
1627+#ifdef CONFIG_COMPAT
1628+ } else if (lower_file->f_op->ioctl) {
1629+ err = lower_file->f_op->compat_ioctl(
1630+ lower_file->f_path.dentry->d_inode,
1631+ lower_file, cmd, arg);
1632+#endif
1633+ }
1634+
1635+out:
1636+ return err;
1637+}
1638+
1639+/*
1640+ * return to user-space the branch indices containing the file in question
1641+ *
1642+ * We use fd_set and therefore we are limited to the number of the branches
1643+ * to FD_SETSIZE, which is currently 1024 - plenty for most people
1644+ */
1645+static int unionfs_ioctl_queryfile(struct file *file, struct dentry *parent,
1646+ unsigned int cmd, unsigned long arg)
1647+{
1648+ int err = 0;
1649+ fd_set branchlist;
1650+ int bstart = 0, bend = 0, bindex = 0;
1651+ int orig_bstart, orig_bend;
1652+ struct dentry *dentry, *lower_dentry;
1653+ struct vfsmount *mnt;
1654+
1655+ dentry = file->f_path.dentry;
1656+ orig_bstart = dbstart(dentry);
1657+ orig_bend = dbend(dentry);
1658+ err = unionfs_partial_lookup(dentry, parent);
1659+ if (err)
1660+ goto out;
1661+ bstart = dbstart(dentry);
1662+ bend = dbend(dentry);
1663+
1664+ FD_ZERO(&branchlist);
1665+
1666+ for (bindex = bstart; bindex <= bend; bindex++) {
1667+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
1668+ if (!lower_dentry)
1669+ continue;
1670+ if (likely(lower_dentry->d_inode))
1671+ FD_SET(bindex, &branchlist);
1672+ /* purge any lower objects after partial_lookup */
1673+ if (bindex < orig_bstart || bindex > orig_bend) {
1674+ dput(lower_dentry);
1675+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1676+ iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1677+ unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1678+ NULL);
1679+ mnt = unionfs_lower_mnt_idx(dentry, bindex);
1680+ if (!mnt)
1681+ continue;
1682+ unionfs_mntput(dentry, bindex);
1683+ unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
1684+ }
1685+ }
1686+ /* restore original dentry's offsets */
1687+ dbstart(dentry) = orig_bstart;
1688+ dbend(dentry) = orig_bend;
1689+ ibstart(dentry->d_inode) = orig_bstart;
1690+ ibend(dentry->d_inode) = orig_bend;
1691+
1692+ err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
1693+ if (unlikely(err))
1694+ err = -EFAULT;
1695+
1696+out:
1697+ return err < 0 ? err : bend;
1698+}
1699+
1700+long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1701+{
1702+ long err;
1703+ struct dentry *dentry = file->f_path.dentry;
1704+ struct dentry *parent;
1705+
1706+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1707+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1708+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1709+
1710+ err = unionfs_file_revalidate(file, parent, true);
1711+ if (unlikely(err))
1712+ goto out;
1713+
1714+ /* check if asked for local commands */
1715+ switch (cmd) {
1716+ case UNIONFS_IOCTL_INCGEN:
1717+ /* Increment the superblock generation count */
1718+ pr_info("unionfs: incgen ioctl deprecated; "
1719+ "use \"-o remount,incgen\"\n");
1720+ err = -ENOSYS;
1721+ break;
1722+
1723+ case UNIONFS_IOCTL_QUERYFILE:
1724+ /* Return list of branches containing the given file */
1725+ err = unionfs_ioctl_queryfile(file, parent, cmd, arg);
1726+ break;
1727+
1728+ default:
1729+ /* pass the ioctl down */
1730+ err = do_ioctl(file, cmd, arg);
1731+ break;
1732+ }
1733+
1734+out:
1735+ unionfs_check_file(file);
1736+ unionfs_unlock_dentry(dentry);
1737+ unionfs_unlock_parent(dentry, parent);
1738+ unionfs_read_unlock(dentry->d_sb);
1739+ return err;
1740+}
1741+
1742+int unionfs_flush(struct file *file, fl_owner_t id)
1743+{
1744+ int err = 0;
1745+ struct file *lower_file = NULL;
1746+ struct dentry *dentry = file->f_path.dentry;
1747+ struct dentry *parent;
1748+ int bindex, bstart, bend;
1749+
1750+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1751+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1752+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1753+
1754+ err = unionfs_file_revalidate(file, parent,
1755+ UNIONFS_F(file)->wrote_to_file);
1756+ if (unlikely(err))
1757+ goto out;
1758+ unionfs_check_file(file);
1759+
1760+ bstart = fbstart(file);
1761+ bend = fbend(file);
1762+ for (bindex = bstart; bindex <= bend; bindex++) {
1763+ lower_file = unionfs_lower_file_idx(file, bindex);
1764+
1765+ if (lower_file && lower_file->f_op &&
1766+ lower_file->f_op->flush) {
1767+ err = lower_file->f_op->flush(lower_file, id);
1768+ if (err)
1769+ goto out;
1770+ }
1771+
1772+ }
1773+
1774+out:
1775+ if (!err)
1776+ unionfs_check_file(file);
1777+ unionfs_unlock_dentry(dentry);
1778+ unionfs_unlock_parent(dentry, parent);
1779+ unionfs_read_unlock(dentry->d_sb);
1780+ return err;
1781+}
1782diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
1783new file mode 100644
1784index 0000000..37c2654
1785--- /dev/null
1786+++ b/fs/unionfs/copyup.c
1787@@ -0,0 +1,896 @@
1788+/*
1789+ * Copyright (c) 2003-2011 Erez Zadok
1790+ * Copyright (c) 2003-2006 Charles P. Wright
1791+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
1792+ * Copyright (c) 2005-2006 Junjiro Okajima
1793+ * Copyright (c) 2005 Arun M. Krishnakumar
1794+ * Copyright (c) 2004-2006 David P. Quigley
1795+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
1796+ * Copyright (c) 2003 Puja Gupta
1797+ * Copyright (c) 2003 Harikesavan Krishnan
1798+ * Copyright (c) 2003-2011 Stony Brook University
1799+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
1800+ *
1801+ * This program is free software; you can redistribute it and/or modify
1802+ * it under the terms of the GNU General Public License version 2 as
1803+ * published by the Free Software Foundation.
1804+ */
1805+
1806+#include "union.h"
1807+
1808+/*
1809+ * For detailed explanation of copyup see:
1810+ * Documentation/filesystems/unionfs/concepts.txt
1811+ */
1812+
1813+#ifdef CONFIG_UNION_FS_XATTR
1814+/* copyup all extended attrs for a given dentry */
1815+static int copyup_xattrs(struct dentry *old_lower_dentry,
1816+ struct dentry *new_lower_dentry)
1817+{
1818+ int err = 0;
1819+ ssize_t list_size = -1;
1820+ char *name_list = NULL;
1821+ char *attr_value = NULL;
1822+ char *name_list_buf = NULL;
1823+
1824+ /* query the actual size of the xattr list */
1825+ list_size = vfs_listxattr(old_lower_dentry, NULL, 0);
1826+ if (list_size <= 0) {
1827+ err = list_size;
1828+ goto out;
1829+ }
1830+
1831+ /* allocate space for the actual list */
1832+ name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX);
1833+ if (unlikely(!name_list || IS_ERR(name_list))) {
1834+ err = PTR_ERR(name_list);
1835+ goto out;
1836+ }
1837+
1838+ name_list_buf = name_list; /* save for kfree at end */
1839+
1840+ /* now get the actual xattr list of the source file */
1841+ list_size = vfs_listxattr(old_lower_dentry, name_list, list_size);
1842+ if (list_size <= 0) {
1843+ err = list_size;
1844+ goto out;
1845+ }
1846+
1847+ /* allocate space to hold each xattr's value */
1848+ attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX);
1849+ if (unlikely(!attr_value || IS_ERR(attr_value))) {
1850+ err = PTR_ERR(name_list);
1851+ goto out;
1852+ }
1853+
1854+ /* in a loop, get and set each xattr from src to dst file */
1855+ while (*name_list) {
1856+ ssize_t size;
1857+
1858+ /* Lock here since vfs_getxattr doesn't lock for us */
1859+ mutex_lock(&old_lower_dentry->d_inode->i_mutex);
1860+ size = vfs_getxattr(old_lower_dentry, name_list,
1861+ attr_value, XATTR_SIZE_MAX);
1862+ mutex_unlock(&old_lower_dentry->d_inode->i_mutex);
1863+ if (size < 0) {
1864+ err = size;
1865+ goto out;
1866+ }
1867+ if (size > XATTR_SIZE_MAX) {
1868+ err = -E2BIG;
1869+ goto out;
1870+ }
1871+ /* Don't lock here since vfs_setxattr does it for us. */
1872+ err = vfs_setxattr(new_lower_dentry, name_list, attr_value,
1873+ size, 0);
1874+ /*
1875+ * Selinux depends on "security.*" xattrs, so to maintain
1876+ * the security of copied-up files, if Selinux is active,
1877+ * then we must copy these xattrs as well. So we need to
1878+ * temporarily get FOWNER privileges.
1879+ * XXX: move entire copyup code to SIOQ.
1880+ */
1881+ if (err == -EPERM && !capable(CAP_FOWNER)) {
1882+ const struct cred *old_creds;
1883+ struct cred *new_creds;
1884+
1885+ new_creds = prepare_creds();
1886+ if (unlikely(!new_creds)) {
1887+ err = -ENOMEM;
1888+ goto out;
1889+ }
1890+ cap_raise(new_creds->cap_effective, CAP_FOWNER);
1891+ old_creds = override_creds(new_creds);
1892+ err = vfs_setxattr(new_lower_dentry, name_list,
1893+ attr_value, size, 0);
1894+ revert_creds(old_creds);
1895+ }
1896+ if (err < 0)
1897+ goto out;
1898+ name_list += strlen(name_list) + 1;
1899+ }
1900+out:
1901+ unionfs_xattr_kfree(name_list_buf);
1902+ unionfs_xattr_kfree(attr_value);
1903+ /* Ignore if xattr isn't supported */
1904+ if (err == -ENOTSUPP || err == -EOPNOTSUPP)
1905+ err = 0;
1906+ return err;
1907+}
1908+#endif /* CONFIG_UNION_FS_XATTR */
1909+
1910+/*
1911+ * Determine the mode based on the copyup flags, and the existing dentry.
1912+ *
1913+ * Handle file systems which may not support certain options. For example
1914+ * jffs2 doesn't allow one to chmod a symlink. So we ignore such harmless
1915+ * errors, rather than propagating them up, which results in copyup errors
1916+ * and errors returned back to users.
1917+ */
1918+static int copyup_permissions(struct super_block *sb,
1919+ struct dentry *old_lower_dentry,
1920+ struct dentry *new_lower_dentry)
1921+{
1922+ struct inode *i = old_lower_dentry->d_inode;
1923+ struct iattr newattrs;
1924+ int err;
1925+
1926+ newattrs.ia_atime = i->i_atime;
1927+ newattrs.ia_mtime = i->i_mtime;
1928+ newattrs.ia_ctime = i->i_ctime;
1929+ newattrs.ia_gid = i->i_gid;
1930+ newattrs.ia_uid = i->i_uid;
1931+ newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME |
1932+ ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE |
1933+ ATTR_GID | ATTR_UID;
1934+ mutex_lock(&new_lower_dentry->d_inode->i_mutex);
1935+ err = notify_change(new_lower_dentry, &newattrs);
1936+ if (err)
1937+ goto out;
1938+
1939+ /* now try to change the mode and ignore EOPNOTSUPP on symlinks */
1940+ newattrs.ia_mode = i->i_mode;
1941+ newattrs.ia_valid = ATTR_MODE | ATTR_FORCE;
1942+ err = notify_change(new_lower_dentry, &newattrs);
1943+ if (err == -EOPNOTSUPP &&
1944+ S_ISLNK(new_lower_dentry->d_inode->i_mode)) {
1945+ printk(KERN_WARNING
1946+ "unionfs: changing \"%s\" symlink mode unsupported\n",
1947+ new_lower_dentry->d_name.name);
1948+ err = 0;
1949+ }
1950+
1951+out:
1952+ mutex_unlock(&new_lower_dentry->d_inode->i_mutex);
1953+ return err;
1954+}
1955+
1956+/*
1957+ * create the new device/file/directory - use copyup_permission to copyup
1958+ * times, and mode
1959+ *
1960+ * if the object being copied up is a regular file, the file is only created,
1961+ * the contents have to be copied up separately
1962+ */
1963+static int __copyup_ndentry(struct dentry *old_lower_dentry,
1964+ struct dentry *new_lower_dentry,
1965+ struct dentry *new_lower_parent_dentry,
1966+ char *symbuf)
1967+{
1968+ int err = 0;
1969+ umode_t old_mode = old_lower_dentry->d_inode->i_mode;
1970+ struct sioq_args args;
1971+
1972+ if (S_ISDIR(old_mode)) {
1973+ args.mkdir.parent = new_lower_parent_dentry->d_inode;
1974+ args.mkdir.dentry = new_lower_dentry;
1975+ args.mkdir.mode = old_mode;
1976+
1977+ run_sioq(__unionfs_mkdir, &args);
1978+ err = args.err;
1979+ } else if (S_ISLNK(old_mode)) {
1980+ args.symlink.parent = new_lower_parent_dentry->d_inode;
1981+ args.symlink.dentry = new_lower_dentry;
1982+ args.symlink.symbuf = symbuf;
1983+
1984+ run_sioq(__unionfs_symlink, &args);
1985+ err = args.err;
1986+ } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) ||
1987+ S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) {
1988+ args.mknod.parent = new_lower_parent_dentry->d_inode;
1989+ args.mknod.dentry = new_lower_dentry;
1990+ args.mknod.mode = old_mode;
1991+ args.mknod.dev = old_lower_dentry->d_inode->i_rdev;
1992+
1993+ run_sioq(__unionfs_mknod, &args);
1994+ err = args.err;
1995+ } else if (S_ISREG(old_mode)) {
1996+ struct nameidata nd;
1997+ err = init_lower_nd(&nd, LOOKUP_CREATE);
1998+ if (unlikely(err < 0))
1999+ goto out;
2000+ args.create.nd = &nd;
2001+ args.create.parent = new_lower_parent_dentry->d_inode;
2002+ args.create.dentry = new_lower_dentry;
2003+ args.create.mode = old_mode;
2004+
2005+ run_sioq(__unionfs_create, &args);
2006+ err = args.err;
2007+ release_lower_nd(&nd, err);
2008+ } else {
2009+ printk(KERN_CRIT "unionfs: unknown inode type %d\n",
2010+ old_mode);
2011+ BUG();
2012+ }
2013+
2014+out:
2015+ return err;
2016+}
2017+
2018+static int __copyup_reg_data(struct dentry *dentry,
2019+ struct dentry *new_lower_dentry, int new_bindex,
2020+ struct dentry *old_lower_dentry, int old_bindex,
2021+ struct file **copyup_file, loff_t len)
2022+{
2023+ struct super_block *sb = dentry->d_sb;
2024+ struct file *input_file;
2025+ struct file *output_file;
2026+ struct vfsmount *output_mnt;
2027+ mm_segment_t old_fs;
2028+ char *buf = NULL;
2029+ ssize_t read_bytes, write_bytes;
2030+ loff_t size;
2031+ int err = 0;
2032+
2033+ /* open old file */
2034+ unionfs_mntget(dentry, old_bindex);
2035+ branchget(sb, old_bindex);
2036+ /* dentry_open calls dput and mntput if it returns an error */
2037+ input_file = dentry_open(old_lower_dentry,
2038+ unionfs_lower_mnt_idx(dentry, old_bindex),
2039+ O_RDONLY | O_LARGEFILE, current_cred());
2040+ if (IS_ERR(input_file)) {
2041+ dput(old_lower_dentry);
2042+ err = PTR_ERR(input_file);
2043+ goto out;
2044+ }
2045+ if (unlikely(!input_file->f_op || !input_file->f_op->read)) {
2046+ err = -EINVAL;
2047+ goto out_close_in;
2048+ }
2049+
2050+ /* open new file */
2051+ dget(new_lower_dentry);
2052+ output_mnt = unionfs_mntget(sb->s_root, new_bindex);
2053+ branchget(sb, new_bindex);
2054+ output_file = dentry_open(new_lower_dentry, output_mnt,
2055+ O_RDWR | O_LARGEFILE, current_cred());
2056+ if (IS_ERR(output_file)) {
2057+ err = PTR_ERR(output_file);
2058+ goto out_close_in2;
2059+ }
2060+ if (unlikely(!output_file->f_op || !output_file->f_op->write)) {
2061+ err = -EINVAL;
2062+ goto out_close_out;
2063+ }
2064+
2065+ /* allocating a buffer */
2066+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2067+ if (unlikely(!buf)) {
2068+ err = -ENOMEM;
2069+ goto out_close_out;
2070+ }
2071+
2072+ input_file->f_pos = 0;
2073+ output_file->f_pos = 0;
2074+
2075+ old_fs = get_fs();
2076+ set_fs(KERNEL_DS);
2077+
2078+ size = len;
2079+ err = 0;
2080+ do {
2081+ if (len >= PAGE_SIZE)
2082+ size = PAGE_SIZE;
2083+ else if ((len < PAGE_SIZE) && (len > 0))
2084+ size = len;
2085+
2086+ len -= PAGE_SIZE;
2087+
2088+ read_bytes =
2089+ input_file->f_op->read(input_file,
2090+ (char __user *)buf, size,
2091+ &input_file->f_pos);
2092+ if (read_bytes <= 0) {
2093+ err = read_bytes;
2094+ break;
2095+ }
2096+
2097+ /* see Documentation/filesystems/unionfs/issues.txt */
2098+ lockdep_off();
2099+ write_bytes =
2100+ output_file->f_op->write(output_file,
2101+ (char __user *)buf,
2102+ read_bytes,
2103+ &output_file->f_pos);
2104+ lockdep_on();
2105+ if ((write_bytes < 0) || (write_bytes < read_bytes)) {
2106+ err = write_bytes;
2107+ break;
2108+ }
2109+ } while ((read_bytes > 0) && (len > 0));
2110+
2111+ set_fs(old_fs);
2112+
2113+ kfree(buf);
2114+
2115+ if (!err)
2116+ err = output_file->f_op->fsync(output_file, 0);
2117+
2118+ if (err)
2119+ goto out_close_out;
2120+
2121+ if (copyup_file) {
2122+ *copyup_file = output_file;
2123+ goto out_close_in;
2124+ }
2125+
2126+out_close_out:
2127+ fput(output_file);
2128+
2129+out_close_in2:
2130+ branchput(sb, new_bindex);
2131+
2132+out_close_in:
2133+ fput(input_file);
2134+
2135+out:
2136+ branchput(sb, old_bindex);
2137+
2138+ return err;
2139+}
2140+
2141+/*
2142+ * dput the lower references for old and new dentry & clear a lower dentry
2143+ * pointer
2144+ */
2145+static void __clear(struct dentry *dentry, struct dentry *old_lower_dentry,
2146+ int old_bstart, int old_bend,
2147+ struct dentry *new_lower_dentry, int new_bindex)
2148+{
2149+ /* get rid of the lower dentry and all its traces */
2150+ unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL);
2151+ dbstart(dentry) = old_bstart;
2152+ dbend(dentry) = old_bend;
2153+
2154+ dput(new_lower_dentry);
2155+ dput(old_lower_dentry);
2156+}
2157+
2158+/*
2159+ * Copy up a dentry to a file of specified name.
2160+ *
2161+ * @dir: used to pull the ->i_sb to access other branches
2162+ * @dentry: the non-negative dentry whose lower_inode we should copy
2163+ * @bstart: the branch of the lower_inode to copy from
2164+ * @new_bindex: the branch to create the new file in
2165+ * @name: the name of the file to create
2166+ * @namelen: length of @name
2167+ * @copyup_file: the "struct file" to return (optional)
2168+ * @len: how many bytes to copy-up?
2169+ */
2170+int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart,
2171+ int new_bindex, const char *name, int namelen,
2172+ struct file **copyup_file, loff_t len)
2173+{
2174+ struct dentry *new_lower_dentry;
2175+ struct dentry *old_lower_dentry = NULL;
2176+ struct super_block *sb;
2177+ int err = 0;
2178+ int old_bindex;
2179+ int old_bstart;
2180+ int old_bend;
2181+ struct dentry *new_lower_parent_dentry = NULL;
2182+ mm_segment_t oldfs;
2183+ char *symbuf = NULL;
2184+
2185+ verify_locked(dentry);
2186+
2187+ old_bindex = bstart;
2188+ old_bstart = dbstart(dentry);
2189+ old_bend = dbend(dentry);
2190+
2191+ BUG_ON(new_bindex < 0);
2192+ BUG_ON(new_bindex >= old_bindex);
2193+
2194+ sb = dir->i_sb;
2195+
2196+ err = is_robranch_super(sb, new_bindex);
2197+ if (err)
2198+ goto out;
2199+
2200+ /* Create the directory structure above this dentry. */
2201+ new_lower_dentry = create_parents(dir, dentry, name, new_bindex);
2202+ if (IS_ERR(new_lower_dentry)) {
2203+ err = PTR_ERR(new_lower_dentry);
2204+ goto out;
2205+ }
2206+
2207+ old_lower_dentry = unionfs_lower_dentry_idx(dentry, old_bindex);
2208+ /* we conditionally dput this old_lower_dentry at end of function */
2209+ dget(old_lower_dentry);
2210+
2211+ /* For symlinks, we must read the link before we lock the directory. */
2212+ if (S_ISLNK(old_lower_dentry->d_inode->i_mode)) {
2213+
2214+ symbuf = kmalloc(PATH_MAX, GFP_KERNEL);
2215+ if (unlikely(!symbuf)) {
2216+ __clear(dentry, old_lower_dentry,
2217+ old_bstart, old_bend,
2218+ new_lower_dentry, new_bindex);
2219+ err = -ENOMEM;
2220+ goto out_free;
2221+ }
2222+
2223+ oldfs = get_fs();
2224+ set_fs(KERNEL_DS);
2225+ err = old_lower_dentry->d_inode->i_op->readlink(
2226+ old_lower_dentry,
2227+ (char __user *)symbuf,
2228+ PATH_MAX);
2229+ set_fs(oldfs);
2230+ if (err < 0) {
2231+ __clear(dentry, old_lower_dentry,
2232+ old_bstart, old_bend,
2233+ new_lower_dentry, new_bindex);
2234+ goto out_free;
2235+ }
2236+ symbuf[err] = '\0';
2237+ }
2238+
2239+ /* Now we lock the parent, and create the object in the new branch. */
2240+ new_lower_parent_dentry = lock_parent(new_lower_dentry);
2241+
2242+ /* create the new inode */
2243+ err = __copyup_ndentry(old_lower_dentry, new_lower_dentry,
2244+ new_lower_parent_dentry, symbuf);
2245+
2246+ if (err) {
2247+ __clear(dentry, old_lower_dentry,
2248+ old_bstart, old_bend,
2249+ new_lower_dentry, new_bindex);
2250+ goto out_unlock;
2251+ }
2252+
2253+ /* We actually copyup the file here. */
2254+ if (S_ISREG(old_lower_dentry->d_inode->i_mode))
2255+ err = __copyup_reg_data(dentry, new_lower_dentry, new_bindex,
2256+ old_lower_dentry, old_bindex,
2257+ copyup_file, len);
2258+ if (err)
2259+ goto out_unlink;
2260+
2261+ /* Set permissions. */
2262+ err = copyup_permissions(sb, old_lower_dentry, new_lower_dentry);
2263+ if (err)
2264+ goto out_unlink;
2265+
2266+#ifdef CONFIG_UNION_FS_XATTR
2267+ /* Selinux uses extended attributes for permissions. */
2268+ err = copyup_xattrs(old_lower_dentry, new_lower_dentry);
2269+ if (err)
2270+ goto out_unlink;
2271+#endif /* CONFIG_UNION_FS_XATTR */
2272+
2273+ /* do not allow files getting deleted to be re-interposed */
2274+ if (!d_deleted(dentry))
2275+ unionfs_reinterpose(dentry);
2276+
2277+ goto out_unlock;
2278+
2279+out_unlink:
2280+ /*
2281+ * copyup failed, because we possibly ran out of space or
2282+ * quota, or something else happened so let's unlink; we don't
2283+ * really care about the return value of vfs_unlink
2284+ */
2285+ vfs_unlink(new_lower_parent_dentry->d_inode, new_lower_dentry);
2286+
2287+ if (copyup_file) {
2288+ /* need to close the file */
2289+
2290+ fput(*copyup_file);
2291+ branchput(sb, new_bindex);
2292+ }
2293+
2294+ /*
2295+ * TODO: should we reset the error to something like -EIO?
2296+ *
2297+ * If we don't reset, the user may get some nonsensical errors, but
2298+ * on the other hand, if we reset to EIO, we guarantee that the user
2299+ * will get a "confusing" error message.
2300+ */
2301+
2302+out_unlock:
2303+ unlock_dir(new_lower_parent_dentry);
2304+
2305+out_free:
2306+ /*
2307+ * If old_lower_dentry was not a file, then we need to dput it. If
2308+ * it was a file, then it was already dput indirectly by other
2309+ * functions we call above which operate on regular files.
2310+ */
2311+ if (old_lower_dentry && old_lower_dentry->d_inode &&
2312+ !S_ISREG(old_lower_dentry->d_inode->i_mode))
2313+ dput(old_lower_dentry);
2314+ kfree(symbuf);
2315+
2316+ if (err) {
2317+ /*
2318+ * if directory creation succeeded, but inode copyup failed,
2319+ * then purge new dentries.
2320+ */
2321+ if (dbstart(dentry) < old_bstart &&
2322+ ibstart(dentry->d_inode) > dbstart(dentry))
2323+ __clear(dentry, NULL, old_bstart, old_bend,
2324+ unionfs_lower_dentry(dentry), dbstart(dentry));
2325+ goto out;
2326+ }
2327+ if (!S_ISDIR(dentry->d_inode->i_mode)) {
2328+ unionfs_postcopyup_release(dentry);
2329+ if (!unionfs_lower_inode(dentry->d_inode)) {
2330+ /*
2331+ * If we got here, then we copied up to an
2332+ * unlinked-open file, whose name is .unionfsXXXXX.
2333+ */
2334+ struct inode *inode = new_lower_dentry->d_inode;
2335+ atomic_inc(&inode->i_count);
2336+ unionfs_set_lower_inode_idx(dentry->d_inode,
2337+ ibstart(dentry->d_inode),
2338+ inode);
2339+ }
2340+ }
2341+ unionfs_postcopyup_setmnt(dentry);
2342+ /* sync inode times from copied-up inode to our inode */
2343+ unionfs_copy_attr_times(dentry->d_inode);
2344+ unionfs_check_inode(dir);
2345+ unionfs_check_dentry(dentry);
2346+out:
2347+ return err;
2348+}
2349+
2350+/*
2351+ * This function creates a copy of a file represented by 'file' which
2352+ * currently resides in branch 'bstart' to branch 'new_bindex.' The copy
2353+ * will be named "name".
2354+ */
2355+int copyup_named_file(struct inode *dir, struct file *file, char *name,
2356+ int bstart, int new_bindex, loff_t len)
2357+{
2358+ int err = 0;
2359+ struct file *output_file = NULL;
2360+
2361+ err = copyup_dentry(dir, file->f_path.dentry, bstart, new_bindex,
2362+ name, strlen(name), &output_file, len);
2363+ if (!err) {
2364+ fbstart(file) = new_bindex;
2365+ unionfs_set_lower_file_idx(file, new_bindex, output_file);
2366+ }
2367+
2368+ return err;
2369+}
2370+
2371+/*
2372+ * This function creates a copy of a file represented by 'file' which
2373+ * currently resides in branch 'bstart' to branch 'new_bindex'.
2374+ */
2375+int copyup_file(struct inode *dir, struct file *file, int bstart,
2376+ int new_bindex, loff_t len)
2377+{
2378+ int err = 0;
2379+ struct file *output_file = NULL;
2380+ struct dentry *dentry = file->f_path.dentry;
2381+
2382+ err = copyup_dentry(dir, dentry, bstart, new_bindex,
2383+ dentry->d_name.name, dentry->d_name.len,
2384+ &output_file, len);
2385+ if (!err) {
2386+ fbstart(file) = new_bindex;
2387+ unionfs_set_lower_file_idx(file, new_bindex, output_file);
2388+ }
2389+
2390+ return err;
2391+}
2392+
2393+/* purge a dentry's lower-branch states (dput/mntput, etc.) */
2394+static void __cleanup_dentry(struct dentry *dentry, int bindex,
2395+ int old_bstart, int old_bend)
2396+{
2397+ int loop_start;
2398+ int loop_end;
2399+ int new_bstart = -1;
2400+ int new_bend = -1;
2401+ int i;
2402+
2403+ loop_start = min(old_bstart, bindex);
2404+ loop_end = max(old_bend, bindex);
2405+
2406+ /*
2407+ * This loop sets the bstart and bend for the new dentry by
2408+ * traversing from left to right. It also dputs all negative
2409+ * dentries except bindex
2410+ */
2411+ for (i = loop_start; i <= loop_end; i++) {
2412+ if (!unionfs_lower_dentry_idx(dentry, i))
2413+ continue;
2414+
2415+ if (i == bindex) {
2416+ new_bend = i;
2417+ if (new_bstart < 0)
2418+ new_bstart = i;
2419+ continue;
2420+ }
2421+
2422+ if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) {
2423+ dput(unionfs_lower_dentry_idx(dentry, i));
2424+ unionfs_set_lower_dentry_idx(dentry, i, NULL);
2425+
2426+ unionfs_mntput(dentry, i);
2427+ unionfs_set_lower_mnt_idx(dentry, i, NULL);
2428+ } else {
2429+ if (new_bstart < 0)
2430+ new_bstart = i;
2431+ new_bend = i;
2432+ }
2433+ }
2434+
2435+ if (new_bstart < 0)
2436+ new_bstart = bindex;
2437+ if (new_bend < 0)
2438+ new_bend = bindex;
2439+ dbstart(dentry) = new_bstart;
2440+ dbend(dentry) = new_bend;
2441+
2442+}
2443+
2444+/* set lower inode ptr and update bstart & bend if necessary */
2445+static void __set_inode(struct dentry *upper, struct dentry *lower,
2446+ int bindex)
2447+{
2448+ unionfs_set_lower_inode_idx(upper->d_inode, bindex,
2449+ igrab(lower->d_inode));
2450+ if (likely(ibstart(upper->d_inode) > bindex))
2451+ ibstart(upper->d_inode) = bindex;
2452+ if (likely(ibend(upper->d_inode) < bindex))
2453+ ibend(upper->d_inode) = bindex;
2454+
2455+}
2456+
2457+/* set lower dentry ptr and update bstart & bend if necessary */
2458+static void __set_dentry(struct dentry *upper, struct dentry *lower,
2459+ int bindex)
2460+{
2461+ unionfs_set_lower_dentry_idx(upper, bindex, lower);
2462+ if (likely(dbstart(upper) > bindex))
2463+ dbstart(upper) = bindex;
2464+ if (likely(dbend(upper) < bindex))
2465+ dbend(upper) = bindex;
2466+}
2467+
2468+/*
2469+ * This function replicates the directory structure up-to given dentry
2470+ * in the bindex branch.
2471+ */
2472+struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
2473+ const char *name, int bindex)
2474+{
2475+ int err;
2476+ struct dentry *child_dentry;
2477+ struct dentry *parent_dentry;
2478+ struct dentry *lower_parent_dentry = NULL;
2479+ struct dentry *lower_dentry = NULL;
2480+ const char *childname;
2481+ unsigned int childnamelen;
2482+ int nr_dentry;
2483+ int count = 0;
2484+ int old_bstart;
2485+ int old_bend;
2486+ struct dentry **path = NULL;
2487+ struct super_block *sb;
2488+
2489+ verify_locked(dentry);
2490+
2491+ err = is_robranch_super(dir->i_sb, bindex);
2492+ if (err) {
2493+ lower_dentry = ERR_PTR(err);
2494+ goto out;
2495+ }
2496+
2497+ old_bstart = dbstart(dentry);
2498+ old_bend = dbend(dentry);
2499+
2500+ lower_dentry = ERR_PTR(-ENOMEM);
2501+
2502+ /* There is no sense allocating any less than the minimum. */
2503+ nr_dentry = 1;
2504+ path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
2505+ if (unlikely(!path))
2506+ goto out;
2507+
2508+ /* assume the negative dentry of unionfs as the parent dentry */
2509+ parent_dentry = dentry;
2510+
2511+ /*
2512+ * This loop finds the first parent that exists in the given branch.
2513+ * We start building the directory structure from there. At the end
2514+ * of the loop, the following should hold:
2515+ * - child_dentry is the first nonexistent child
2516+ * - parent_dentry is the first existent parent
2517+ * - path[0] is the = deepest child
2518+ * - path[count] is the first child to create
2519+ */
2520+ do {
2521+ child_dentry = parent_dentry;
2522+
2523+ /* find the parent directory dentry in unionfs */
2524+ parent_dentry = dget_parent(child_dentry);
2525+
2526+ /* find out the lower_parent_dentry in the given branch */
2527+ lower_parent_dentry =
2528+ unionfs_lower_dentry_idx(parent_dentry, bindex);
2529+
2530+ /* grow path table */
2531+ if (count == nr_dentry) {
2532+ void *p;
2533+
2534+ nr_dentry *= 2;
2535+ p = krealloc(path, nr_dentry * sizeof(struct dentry *),
2536+ GFP_KERNEL);
2537+ if (unlikely(!p)) {
2538+ lower_dentry = ERR_PTR(-ENOMEM);
2539+ goto out;
2540+ }
2541+ path = p;
2542+ }
2543+
2544+ /* store the child dentry */
2545+ path[count++] = child_dentry;
2546+ } while (!lower_parent_dentry);
2547+ count--;
2548+
2549+ sb = dentry->d_sb;
2550+
2551+ /*
2552+ * This code goes between the begin/end labels and basically
2553+ * emulates a while(child_dentry != dentry), only cleaner and
2554+ * shorter than what would be a much longer while loop.
2555+ */
2556+begin:
2557+ /* get lower parent dir in the current branch */
2558+ lower_parent_dentry = unionfs_lower_dentry_idx(parent_dentry, bindex);
2559+ dput(parent_dentry);
2560+
2561+ /* init the values to lookup */
2562+ childname = child_dentry->d_name.name;
2563+ childnamelen = child_dentry->d_name.len;
2564+
2565+ if (child_dentry != dentry) {
2566+ /* lookup child in the underlying file system */
2567+ lower_dentry = lookup_lck_len(childname, lower_parent_dentry,
2568+ childnamelen);
2569+ if (IS_ERR(lower_dentry))
2570+ goto out;
2571+ } else {
2572+ /*
2573+ * Is the name a whiteout of the child name ? lookup the
2574+ * whiteout child in the underlying file system
2575+ */
2576+ lower_dentry = lookup_lck_len(name, lower_parent_dentry,
2577+ strlen(name));
2578+ if (IS_ERR(lower_dentry))
2579+ goto out;
2580+
2581+ /* Replace the current dentry (if any) with the new one */
2582+ dput(unionfs_lower_dentry_idx(dentry, bindex));
2583+ unionfs_set_lower_dentry_idx(dentry, bindex,
2584+ lower_dentry);
2585+
2586+ __cleanup_dentry(dentry, bindex, old_bstart, old_bend);
2587+ goto out;
2588+ }
2589+
2590+ if (lower_dentry->d_inode) {
2591+ /*
2592+ * since this already exists we dput to avoid
2593+ * multiple references on the same dentry
2594+ */
2595+ dput(lower_dentry);
2596+ } else {
2597+ struct sioq_args args;
2598+
2599+ /* it's a negative dentry, create a new dir */
2600+ lower_parent_dentry = lock_parent(lower_dentry);
2601+
2602+ args.mkdir.parent = lower_parent_dentry->d_inode;
2603+ args.mkdir.dentry = lower_dentry;
2604+ args.mkdir.mode = child_dentry->d_inode->i_mode;
2605+
2606+ run_sioq(__unionfs_mkdir, &args);
2607+ err = args.err;
2608+
2609+ if (!err)
2610+ err = copyup_permissions(dir->i_sb, child_dentry,
2611+ lower_dentry);
2612+ unlock_dir(lower_parent_dentry);
2613+ if (err) {
2614+ dput(lower_dentry);
2615+ lower_dentry = ERR_PTR(err);
2616+ goto out;
2617+ }
2618+
2619+ }
2620+
2621+ __set_inode(child_dentry, lower_dentry, bindex);
2622+ __set_dentry(child_dentry, lower_dentry, bindex);
2623+ /*
2624+ * update times of this dentry, but also the parent, because if
2625+ * we changed, the parent may have changed too.
2626+ */
2627+ fsstack_copy_attr_times(parent_dentry->d_inode,
2628+ lower_parent_dentry->d_inode);
2629+ unionfs_copy_attr_times(child_dentry->d_inode);
2630+
2631+ parent_dentry = child_dentry;
2632+ child_dentry = path[--count];
2633+ goto begin;
2634+out:
2635+ /* cleanup any leftover locks from the do/while loop above */
2636+ if (IS_ERR(lower_dentry))
2637+ while (count)
2638+ dput(path[count--]);
2639+ kfree(path);
2640+ return lower_dentry;
2641+}
2642+
2643+/*
2644+ * Post-copyup helper to ensure we have valid mnts: set lower mnt of
2645+ * dentry+parents to the first parent node that has an mnt.
2646+ */
2647+void unionfs_postcopyup_setmnt(struct dentry *dentry)
2648+{
2649+ struct dentry *parent, *hasone;
2650+ int bindex = dbstart(dentry);
2651+
2652+ if (unionfs_lower_mnt_idx(dentry, bindex))
2653+ return;
2654+ hasone = dentry->d_parent;
2655+ /* this loop should stop at root dentry */
2656+ while (!unionfs_lower_mnt_idx(hasone, bindex))
2657+ hasone = hasone->d_parent;
2658+ parent = dentry;
2659+ while (!unionfs_lower_mnt_idx(parent, bindex)) {
2660+ unionfs_set_lower_mnt_idx(parent, bindex,
2661+ unionfs_mntget(hasone, bindex));
2662+ parent = parent->d_parent;
2663+ }
2664+}
2665+
2666+/*
2667+ * Post-copyup helper to release all non-directory source objects of a
2668+ * copied-up file. Regular files should have only one lower object.
2669+ */
2670+void unionfs_postcopyup_release(struct dentry *dentry)
2671+{
2672+ int bstart, bend;
2673+
2674+ BUG_ON(S_ISDIR(dentry->d_inode->i_mode));
2675+ bstart = dbstart(dentry);
2676+ bend = dbend(dentry);
2677+
2678+ path_put_lowers(dentry, bstart + 1, bend, false);
2679+ iput_lowers(dentry->d_inode, bstart + 1, bend, false);
2680+
2681+ dbend(dentry) = bstart;
2682+ ibend(dentry->d_inode) = ibstart(dentry->d_inode) = bstart;
2683+}
2684diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
2685new file mode 100644
2686index 0000000..6092e69
2687--- /dev/null
2688+++ b/fs/unionfs/debug.c
2689@@ -0,0 +1,548 @@
2690+/*
2691+ * Copyright (c) 2003-2011 Erez Zadok
2692+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
2693+ * Copyright (c) 2003-2011 Stony Brook University
2694+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2695+ *
2696+ * This program is free software; you can redistribute it and/or modify
2697+ * it under the terms of the GNU General Public License version 2 as
2698+ * published by the Free Software Foundation.
2699+ */
2700+
2701+#include "union.h"
2702+
2703+/*
2704+ * Helper debugging functions for maintainers (and for users to report back
2705+ * useful information back to maintainers)
2706+ */
2707+
2708+/* it's always useful to know what part of the code called us */
2709+#define PRINT_CALLER(fname, fxn, line) \
2710+ do { \
2711+ if (!printed_caller) { \
2712+ pr_debug("PC:%s:%s:%d\n", (fname), (fxn), (line)); \
2713+ printed_caller = 1; \
2714+ } \
2715+ } while (0)
2716+
2717+/*
2718+ * __unionfs_check_{inode,dentry,file} perform exhaustive sanity checking on
2719+ * the fan-out of various Unionfs objects. We check that no lower objects
2720+ * exist outside the start/end branch range; that all objects within are
2721+ * non-NULL (with some allowed exceptions); that for every lower file
2722+ * there's a lower dentry+inode; that the start/end ranges match for all
2723+ * corresponding lower objects; that open files/symlinks have only one lower
2724+ * objects, but directories can have several; and more.
2725+ */
2726+void __unionfs_check_inode(const struct inode *inode,
2727+ const char *fname, const char *fxn, int line)
2728+{
2729+ int bindex;
2730+ int istart, iend;
2731+ struct inode *lower_inode;
2732+ struct super_block *sb;
2733+ int printed_caller = 0;
2734+ void *poison_ptr;
2735+
2736+ /* for inodes now */
2737+ BUG_ON(!inode);
2738+ sb = inode->i_sb;
2739+ istart = ibstart(inode);
2740+ iend = ibend(inode);
2741+ /* don't check inode if no lower branches */
2742+ if (istart < 0 && iend < 0)
2743+ return;
2744+ if (unlikely(istart > iend)) {
2745+ PRINT_CALLER(fname, fxn, line);
2746+ pr_debug(" Ci0: inode=%p istart/end=%d:%d\n",
2747+ inode, istart, iend);
2748+ }
2749+ if (unlikely((istart == -1 && iend != -1) ||
2750+ (istart != -1 && iend == -1))) {
2751+ PRINT_CALLER(fname, fxn, line);
2752+ pr_debug(" Ci1: inode=%p istart/end=%d:%d\n",
2753+ inode, istart, iend);
2754+ }
2755+ if (!S_ISDIR(inode->i_mode)) {
2756+ if (unlikely(iend != istart)) {
2757+ PRINT_CALLER(fname, fxn, line);
2758+ pr_debug(" Ci2: inode=%p istart=%d iend=%d\n",
2759+ inode, istart, iend);
2760+ }
2761+ }
2762+
2763+ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2764+ if (unlikely(!UNIONFS_I(inode))) {
2765+ PRINT_CALLER(fname, fxn, line);
2766+ pr_debug(" Ci3: no inode_info %p\n", inode);
2767+ return;
2768+ }
2769+ if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
2770+ PRINT_CALLER(fname, fxn, line);
2771+ pr_debug(" Ci4: no lower_inodes %p\n", inode);
2772+ return;
2773+ }
2774+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
2775+ if (lower_inode) {
2776+ memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2777+ if (unlikely(bindex < istart || bindex > iend)) {
2778+ PRINT_CALLER(fname, fxn, line);
2779+ pr_debug(" Ci5: inode/linode=%p:%p bindex=%d "
2780+ "istart/end=%d:%d\n", inode,
2781+ lower_inode, bindex, istart, iend);
2782+ } else if (unlikely(lower_inode == poison_ptr)) {
2783+ /* freed inode! */
2784+ PRINT_CALLER(fname, fxn, line);
2785+ pr_debug(" Ci6: inode/linode=%p:%p bindex=%d "
2786+ "istart/end=%d:%d\n", inode,
2787+ lower_inode, bindex, istart, iend);
2788+ }
2789+ continue;
2790+ }
2791+ /* if we get here, then lower_inode == NULL */
2792+ if (bindex < istart || bindex > iend)
2793+ continue;
2794+ /*
2795+ * directories can have NULL lower inodes in b/t start/end,
2796+ * but NOT if at the start/end range.
2797+ */
2798+ if (unlikely(S_ISDIR(inode->i_mode) &&
2799+ bindex > istart && bindex < iend))
2800+ continue;
2801+ PRINT_CALLER(fname, fxn, line);
2802+ pr_debug(" Ci7: inode/linode=%p:%p "
2803+ "bindex=%d istart/end=%d:%d\n",
2804+ inode, lower_inode, bindex, istart, iend);
2805+ }
2806+}
2807+
2808+void __unionfs_check_dentry(const struct dentry *dentry,
2809+ const char *fname, const char *fxn, int line)
2810+{
2811+ int bindex;
2812+ int dstart, dend, istart, iend;
2813+ struct dentry *lower_dentry;
2814+ struct inode *inode, *lower_inode;
2815+ struct super_block *sb;
2816+ struct vfsmount *lower_mnt;
2817+ int printed_caller = 0;
2818+ void *poison_ptr;
2819+
2820+ BUG_ON(!dentry);
2821+ sb = dentry->d_sb;
2822+ inode = dentry->d_inode;
2823+ dstart = dbstart(dentry);
2824+ dend = dbend(dentry);
2825+ /* don't check dentry/mnt if no lower branches */
2826+ if (dstart < 0 && dend < 0)
2827+ goto check_inode;
2828+ BUG_ON(dstart > dend);
2829+
2830+ if (unlikely((dstart == -1 && dend != -1) ||
2831+ (dstart != -1 && dend == -1))) {
2832+ PRINT_CALLER(fname, fxn, line);
2833+ pr_debug(" CD0: dentry=%p dstart/end=%d:%d\n",
2834+ dentry, dstart, dend);
2835+ }
2836+ /*
2837+ * check for NULL dentries inside the start/end range, or
2838+ * non-NULL dentries outside the start/end range.
2839+ */
2840+ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2841+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
2842+ if (lower_dentry) {
2843+ if (unlikely(bindex < dstart || bindex > dend)) {
2844+ PRINT_CALLER(fname, fxn, line);
2845+ pr_debug(" CD1: dentry/lower=%p:%p(%p) "
2846+ "bindex=%d dstart/end=%d:%d\n",
2847+ dentry, lower_dentry,
2848+ (lower_dentry ? lower_dentry->d_inode :
2849+ (void *) -1L),
2850+ bindex, dstart, dend);
2851+ }
2852+ } else { /* lower_dentry == NULL */
2853+ if (bindex < dstart || bindex > dend)
2854+ continue;
2855+ /*
2856+ * Directories can have NULL lower inodes in b/t
2857+ * start/end, but NOT if at the start/end range.
2858+ * Ignore this rule, however, if this is a NULL
2859+ * dentry or a deleted dentry.
2860+ */
2861+ if (unlikely(!d_deleted((struct dentry *) dentry) &&
2862+ inode &&
2863+ !(inode && S_ISDIR(inode->i_mode) &&
2864+ bindex > dstart && bindex < dend))) {
2865+ PRINT_CALLER(fname, fxn, line);
2866+ pr_debug(" CD2: dentry/lower=%p:%p(%p) "
2867+ "bindex=%d dstart/end=%d:%d\n",
2868+ dentry, lower_dentry,
2869+ (lower_dentry ?
2870+ lower_dentry->d_inode :
2871+ (void *) -1L),
2872+ bindex, dstart, dend);
2873+ }
2874+ }
2875+ }
2876+
2877+ /* check for vfsmounts same as for dentries */
2878+ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2879+ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2880+ if (lower_mnt) {
2881+ if (unlikely(bindex < dstart || bindex > dend)) {
2882+ PRINT_CALLER(fname, fxn, line);
2883+ pr_debug(" CM0: dentry/lmnt=%p:%p bindex=%d "
2884+ "dstart/end=%d:%d\n", dentry,
2885+ lower_mnt, bindex, dstart, dend);
2886+ }
2887+ } else { /* lower_mnt == NULL */
2888+ if (bindex < dstart || bindex > dend)
2889+ continue;
2890+ /*
2891+ * Directories can have NULL lower inodes in b/t
2892+ * start/end, but NOT if at the start/end range.
2893+ * Ignore this rule, however, if this is a NULL
2894+ * dentry.
2895+ */
2896+ if (unlikely(inode &&
2897+ !(inode && S_ISDIR(inode->i_mode) &&
2898+ bindex > dstart && bindex < dend))) {
2899+ PRINT_CALLER(fname, fxn, line);
2900+ pr_debug(" CM1: dentry/lmnt=%p:%p "
2901+ "bindex=%d dstart/end=%d:%d\n",
2902+ dentry, lower_mnt, bindex,
2903+ dstart, dend);
2904+ }
2905+ }
2906+ }
2907+
2908+check_inode:
2909+ /* for inodes now */
2910+ if (!inode)
2911+ return;
2912+ istart = ibstart(inode);
2913+ iend = ibend(inode);
2914+ /* don't check inode if no lower branches */
2915+ if (istart < 0 && iend < 0)
2916+ return;
2917+ BUG_ON(istart > iend);
2918+ if (unlikely((istart == -1 && iend != -1) ||
2919+ (istart != -1 && iend == -1))) {
2920+ PRINT_CALLER(fname, fxn, line);
2921+ pr_debug(" CI0: dentry/inode=%p:%p istart/end=%d:%d\n",
2922+ dentry, inode, istart, iend);
2923+ }
2924+ if (unlikely(istart != dstart)) {
2925+ PRINT_CALLER(fname, fxn, line);
2926+ pr_debug(" CI1: dentry/inode=%p:%p istart=%d dstart=%d\n",
2927+ dentry, inode, istart, dstart);
2928+ }
2929+ if (unlikely(iend != dend)) {
2930+ PRINT_CALLER(fname, fxn, line);
2931+ pr_debug(" CI2: dentry/inode=%p:%p iend=%d dend=%d\n",
2932+ dentry, inode, iend, dend);
2933+ }
2934+
2935+ if (!S_ISDIR(inode->i_mode)) {
2936+ if (unlikely(dend != dstart)) {
2937+ PRINT_CALLER(fname, fxn, line);
2938+ pr_debug(" CI3: dentry/inode=%p:%p dstart=%d dend=%d\n",
2939+ dentry, inode, dstart, dend);
2940+ }
2941+ if (unlikely(iend != istart)) {
2942+ PRINT_CALLER(fname, fxn, line);
2943+ pr_debug(" CI4: dentry/inode=%p:%p istart=%d iend=%d\n",
2944+ dentry, inode, istart, iend);
2945+ }
2946+ }
2947+
2948+ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2949+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
2950+ if (lower_inode) {
2951+ memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2952+ if (unlikely(bindex < istart || bindex > iend)) {
2953+ PRINT_CALLER(fname, fxn, line);
2954+ pr_debug(" CI5: dentry/linode=%p:%p bindex=%d "
2955+ "istart/end=%d:%d\n", dentry,
2956+ lower_inode, bindex, istart, iend);
2957+ } else if (unlikely(lower_inode == poison_ptr)) {
2958+ /* freed inode! */
2959+ PRINT_CALLER(fname, fxn, line);
2960+ pr_debug(" CI6: dentry/linode=%p:%p bindex=%d "
2961+ "istart/end=%d:%d\n", dentry,
2962+ lower_inode, bindex, istart, iend);
2963+ }
2964+ continue;
2965+ }
2966+ /* if we get here, then lower_inode == NULL */
2967+ if (bindex < istart || bindex > iend)
2968+ continue;
2969+ /*
2970+ * directories can have NULL lower inodes in b/t start/end,
2971+ * but NOT if at the start/end range.
2972+ */
2973+ if (unlikely(S_ISDIR(inode->i_mode) &&
2974+ bindex > istart && bindex < iend))
2975+ continue;
2976+ PRINT_CALLER(fname, fxn, line);
2977+ pr_debug(" CI7: dentry/linode=%p:%p "
2978+ "bindex=%d istart/end=%d:%d\n",
2979+ dentry, lower_inode, bindex, istart, iend);
2980+ }
2981+
2982+ /*
2983+ * If it's a directory, then intermediate objects b/t start/end can
2984+ * be NULL. But, check that all three are NULL: lower dentry, mnt,
2985+ * and inode.
2986+ */
2987+ if (dstart >= 0 && dend >= 0 && S_ISDIR(inode->i_mode))
2988+ for (bindex = dstart+1; bindex < dend; bindex++) {
2989+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
2990+ lower_dentry = unionfs_lower_dentry_idx(dentry,
2991+ bindex);
2992+ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2993+ if (unlikely(!((lower_inode && lower_dentry &&
2994+ lower_mnt) ||
2995+ (!lower_inode &&
2996+ !lower_dentry && !lower_mnt)))) {
2997+ PRINT_CALLER(fname, fxn, line);
2998+ pr_debug(" Cx: lmnt/ldentry/linode=%p:%p:%p "
2999+ "bindex=%d dstart/end=%d:%d\n",
3000+ lower_mnt, lower_dentry, lower_inode,
3001+ bindex, dstart, dend);
3002+ }
3003+ }
3004+ /* check if lower inode is newer than upper one (it shouldn't) */
3005+ if (unlikely(is_newer_lower(dentry) && !is_negative_lower(dentry))) {
3006+ PRINT_CALLER(fname, fxn, line);
3007+ for (bindex = ibstart(inode); bindex <= ibend(inode);
3008+ bindex++) {
3009+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
3010+ if (unlikely(!lower_inode))
3011+ continue;
3012+ pr_debug(" CI8: bindex=%d mtime/lmtime=%lu.%lu/%lu.%lu "
3013+ "ctime/lctime=%lu.%lu/%lu.%lu\n",
3014+ bindex,
3015+ inode->i_mtime.tv_sec,
3016+ inode->i_mtime.tv_nsec,
3017+ lower_inode->i_mtime.tv_sec,
3018+ lower_inode->i_mtime.tv_nsec,
3019+ inode->i_ctime.tv_sec,
3020+ inode->i_ctime.tv_nsec,
3021+ lower_inode->i_ctime.tv_sec,
3022+ lower_inode->i_ctime.tv_nsec);
3023+ }
3024+ }
3025+}
3026+
3027+void __unionfs_check_file(const struct file *file,
3028+ const char *fname, const char *fxn, int line)
3029+{
3030+ int bindex;
3031+ int dstart, dend, fstart, fend;
3032+ struct dentry *dentry;
3033+ struct file *lower_file;
3034+ struct inode *inode;
3035+ struct super_block *sb;
3036+ int printed_caller = 0;
3037+
3038+ BUG_ON(!file);
3039+ dentry = file->f_path.dentry;
3040+ sb = dentry->d_sb;
3041+ dstart = dbstart(dentry);
3042+ dend = dbend(dentry);
3043+ BUG_ON(dstart > dend);
3044+ fstart = fbstart(file);
3045+ fend = fbend(file);
3046+ BUG_ON(fstart > fend);
3047+
3048+ if (unlikely((fstart == -1 && fend != -1) ||
3049+ (fstart != -1 && fend == -1))) {
3050+ PRINT_CALLER(fname, fxn, line);
3051+ pr_debug(" CF0: file/dentry=%p:%p fstart/end=%d:%d\n",
3052+ file, dentry, fstart, fend);
3053+ }
3054+ if (unlikely(fstart != dstart)) {
3055+ PRINT_CALLER(fname, fxn, line);
3056+ pr_debug(" CF1: file/dentry=%p:%p fstart=%d dstart=%d\n",
3057+ file, dentry, fstart, dstart);
3058+ }
3059+ if (unlikely(fend != dend)) {
3060+ PRINT_CALLER(fname, fxn, line);
3061+ pr_debug(" CF2: file/dentry=%p:%p fend=%d dend=%d\n",
3062+ file, dentry, fend, dend);
3063+ }
3064+ inode = dentry->d_inode;
3065+ if (!S_ISDIR(inode->i_mode)) {
3066+ if (unlikely(fend != fstart)) {
3067+ PRINT_CALLER(fname, fxn, line);
3068+ pr_debug(" CF3: file/inode=%p:%p fstart=%d fend=%d\n",
3069+ file, inode, fstart, fend);
3070+ }
3071+ if (unlikely(dend != dstart)) {
3072+ PRINT_CALLER(fname, fxn, line);
3073+ pr_debug(" CF4: file/dentry=%p:%p dstart=%d dend=%d\n",
3074+ file, dentry, dstart, dend);
3075+ }
3076+ }
3077+
3078+ /*
3079+ * check for NULL dentries inside the start/end range, or
3080+ * non-NULL dentries outside the start/end range.
3081+ */
3082+ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
3083+ lower_file = unionfs_lower_file_idx(file, bindex);
3084+ if (lower_file) {
3085+ if (unlikely(bindex < fstart || bindex > fend)) {
3086+ PRINT_CALLER(fname, fxn, line);
3087+ pr_debug(" CF5: file/lower=%p:%p bindex=%d "
3088+ "fstart/end=%d:%d\n", file,
3089+ lower_file, bindex, fstart, fend);
3090+ }
3091+ } else { /* lower_file == NULL */
3092+ if (bindex >= fstart && bindex <= fend) {
3093+ /*
3094+ * directories can have NULL lower inodes in
3095+ * b/t start/end, but NOT if at the
3096+ * start/end range.
3097+ */
3098+ if (unlikely(!(S_ISDIR(inode->i_mode) &&
3099+ bindex > fstart &&
3100+ bindex < fend))) {
3101+ PRINT_CALLER(fname, fxn, line);
3102+ pr_debug(" CF6: file/lower=%p:%p "
3103+ "bindex=%d fstart/end=%d:%d\n",
3104+ file, lower_file, bindex,
3105+ fstart, fend);
3106+ }
3107+ }
3108+ }
3109+ }
3110+
3111+ __unionfs_check_dentry(dentry, fname, fxn, line);
3112+}
3113+
3114+void __unionfs_check_nd(const struct nameidata *nd,
3115+ const char *fname, const char *fxn, int line)
3116+{
3117+ struct file *file;
3118+ int printed_caller = 0;
3119+
3120+ if (unlikely(!nd))
3121+ return;
3122+ if (nd->flags & LOOKUP_OPEN) {
3123+ file = nd->intent.open.file;
3124+ if (unlikely(file->f_path.dentry &&
3125+ strcmp(file->f_path.dentry->d_sb->s_type->name,
3126+ UNIONFS_NAME))) {
3127+ PRINT_CALLER(fname, fxn, line);
3128+ pr_debug(" CND1: lower_file of type %s\n",
3129+ file->f_path.dentry->d_sb->s_type->name);
3130+ }
3131+ }
3132+}
3133+
3134+static unsigned int __mnt_get_count(struct vfsmount *mnt)
3135+{
3136+#ifdef CONFIG_SMP
3137+ unsigned int count = 0;
3138+ int cpu;
3139+
3140+ for_each_possible_cpu(cpu) {
3141+ count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
3142+ }
3143+
3144+ return count;
3145+#else
3146+ return mnt->mnt_count;
3147+#endif
3148+}
3149+
3150+/* useful to track vfsmount leaks that could cause EBUSY on unmount */
3151+void __show_branch_counts(const struct super_block *sb,
3152+ const char *file, const char *fxn, int line)
3153+{
3154+ int i;
3155+ struct vfsmount *mnt;
3156+
3157+ pr_debug("BC:");
3158+ for (i = 0; i < sbmax(sb); i++) {
3159+ if (likely(sb->s_root))
3160+ mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt;
3161+ else
3162+ mnt = NULL;
3163+ printk(KERN_CONT "%d:",
3164+ (mnt ? __mnt_get_count(mnt) : -99));
3165+ }
3166+ printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
3167+}
3168+
3169+void __show_inode_times(const struct inode *inode,
3170+ const char *file, const char *fxn, int line)
3171+{
3172+ struct inode *lower_inode;
3173+ int bindex;
3174+
3175+ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3176+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
3177+ if (unlikely(!lower_inode))
3178+ continue;
3179+ pr_debug("IT(%lu:%d): %s:%s:%d "
3180+ "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
3181+ inode->i_ino, bindex,
3182+ file, fxn, line,
3183+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
3184+ lower_inode->i_mtime.tv_sec,
3185+ lower_inode->i_mtime.tv_nsec,
3186+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
3187+ lower_inode->i_ctime.tv_sec,
3188+ lower_inode->i_ctime.tv_nsec);
3189+ }
3190+}
3191+
3192+void __show_dinode_times(const struct dentry *dentry,
3193+ const char *file, const char *fxn, int line)
3194+{
3195+ struct inode *inode = dentry->d_inode;
3196+ struct inode *lower_inode;
3197+ int bindex;
3198+
3199+ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3200+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
3201+ if (!lower_inode)
3202+ continue;
3203+ pr_debug("DT(%s:%lu:%d): %s:%s:%d "
3204+ "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
3205+ dentry->d_name.name, inode->i_ino, bindex,
3206+ file, fxn, line,
3207+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
3208+ lower_inode->i_mtime.tv_sec,
3209+ lower_inode->i_mtime.tv_nsec,
3210+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
3211+ lower_inode->i_ctime.tv_sec,
3212+ lower_inode->i_ctime.tv_nsec);
3213+ }
3214+}
3215+
3216+void __show_inode_counts(const struct inode *inode,
3217+ const char *file, const char *fxn, int line)
3218+{
3219+ struct inode *lower_inode;
3220+ int bindex;
3221+
3222+ if (unlikely(!inode)) {
3223+ pr_debug("SiC: Null inode\n");
3224+ return;
3225+ }
3226+ for (bindex = sbstart(inode->i_sb); bindex <= sbend(inode->i_sb);
3227+ bindex++) {
3228+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
3229+ if (unlikely(!lower_inode))
3230+ continue;
3231+ pr_debug("SIC(%lu:%d:%d): lc=%d %s:%s:%d\n",
3232+ inode->i_ino, bindex,
3233+ atomic_read(&(inode)->i_count),
3234+ atomic_read(&(lower_inode)->i_count),
3235+ file, fxn, line);
3236+ }
3237+}
3238diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
3239new file mode 100644
3240index 0000000..c0205a4
3241--- /dev/null
3242+++ b/fs/unionfs/dentry.c
3243@@ -0,0 +1,406 @@
3244+/*
3245+ * Copyright (c) 2003-2011 Erez Zadok
3246+ * Copyright (c) 2003-2006 Charles P. Wright
3247+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3248+ * Copyright (c) 2005-2006 Junjiro Okajima
3249+ * Copyright (c) 2005 Arun M. Krishnakumar
3250+ * Copyright (c) 2004-2006 David P. Quigley
3251+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3252+ * Copyright (c) 2003 Puja Gupta
3253+ * Copyright (c) 2003 Harikesavan Krishnan
3254+ * Copyright (c) 2003-2011 Stony Brook University
3255+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
3256+ *
3257+ * This program is free software; you can redistribute it and/or modify
3258+ * it under the terms of the GNU General Public License version 2 as
3259+ * published by the Free Software Foundation.
3260+ */
3261+
3262+#include "union.h"
3263+
3264+bool is_negative_lower(const struct dentry *dentry)
3265+{
3266+ int bindex;
3267+ struct dentry *lower_dentry;
3268+
3269+ BUG_ON(!dentry);
3270+ /* cache coherency: check if file was deleted on lower branch */
3271+ if (dbstart(dentry) < 0)
3272+ return true;
3273+ for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
3274+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3275+ /* unhashed (i.e., unlinked) lower dentries don't count */
3276+ if (lower_dentry && lower_dentry->d_inode &&
3277+ !d_deleted(lower_dentry) &&
3278+ !(lower_dentry->d_flags & DCACHE_NFSFS_RENAMED))
3279+ return false;
3280+ }
3281+ return true;
3282+}
3283+
3284+static inline void __dput_lowers(struct dentry *dentry, int start, int end)
3285+{
3286+ struct dentry *lower_dentry;
3287+ int bindex;
3288+
3289+ if (start < 0)
3290+ return;
3291+ for (bindex = start; bindex <= end; bindex++) {
3292+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3293+ if (!lower_dentry)
3294+ continue;
3295+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
3296+ dput(lower_dentry);
3297+ }
3298+}
3299+
3300+/*
3301+ * Purge and invalidate as many data pages of a unionfs inode. This is
3302+ * called when the lower inode has changed, and we want to force processes
3303+ * to re-get the new data.
3304+ */
3305+static inline void purge_inode_data(struct inode *inode)
3306+{
3307+ /* remove all non-private mappings */
3308+ unmap_mapping_range(inode->i_mapping, 0, 0, 0);
3309+ /* invalidate as many pages as possible */
3310+ invalidate_mapping_pages(inode->i_mapping, 0, -1);
3311+ /*
3312+ * Don't try to truncate_inode_pages here, because this could lead
3313+ * to a deadlock between some of address_space ops and dentry
3314+ * revalidation: the address space op is invoked with a lock on our
3315+ * own page, and truncate_inode_pages will block on locked pages.
3316+ */
3317+}
3318+
3319+/*
3320+ * Revalidate a single file/symlink/special dentry. Assume that info nodes
3321+ * of the @dentry and its @parent are locked. Assume parent is valid,
3322+ * otherwise return false (and let's hope the VFS will try to re-lookup this
3323+ * dentry). Returns true if valid, false otherwise.
3324+ */
3325+bool __unionfs_d_revalidate(struct dentry *dentry, struct dentry *parent,
3326+ bool willwrite)
3327+{
3328+ bool valid = true; /* default is valid */
3329+ struct dentry *lower_dentry;
3330+ struct dentry *result;
3331+ int bindex, bstart, bend;
3332+ int sbgen, dgen, pdgen;
3333+ int positive = 0;
3334+ int interpose_flag;
3335+
3336+ verify_locked(dentry);
3337+ verify_locked(parent);
3338+
3339+ /* if the dentry is unhashed, do NOT revalidate */
3340+ if (d_deleted(dentry))
3341+ goto out;
3342+
3343+ dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3344+
3345+ if (is_newer_lower(dentry)) {
3346+ /* root dentry is always valid */
3347+ if (IS_ROOT(dentry)) {
3348+ unionfs_copy_attr_times(dentry->d_inode);
3349+ } else {
3350+ /*
3351+ * reset generation number to zero, guaranteed to be
3352+ * "old"
3353+ */
3354+ dgen = 0;
3355+ atomic_set(&UNIONFS_D(dentry)->generation, dgen);
3356+ }
3357+ if (!willwrite)
3358+ purge_inode_data(dentry->d_inode);
3359+ }
3360+
3361+ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3362+
3363+ BUG_ON(dbstart(dentry) == -1);
3364+ if (dentry->d_inode)
3365+ positive = 1;
3366+
3367+ /* if our dentry is valid, then validate all lower ones */
3368+ if (sbgen == dgen)
3369+ goto validate_lowers;
3370+
3371+ /* The root entry should always be valid */
3372+ BUG_ON(IS_ROOT(dentry));
3373+
3374+ /* We can't work correctly if our parent isn't valid. */
3375+ pdgen = atomic_read(&UNIONFS_D(parent)->generation);
3376+
3377+ /* Free the pointers for our inodes and this dentry. */
3378+ path_put_lowers_all(dentry, false);
3379+
3380+ interpose_flag = INTERPOSE_REVAL_NEG;
3381+ if (positive) {
3382+ interpose_flag = INTERPOSE_REVAL;
3383+ iput_lowers_all(dentry->d_inode, true);
3384+ }
3385+
3386+ if (realloc_dentry_private_data(dentry) != 0) {
3387+ valid = false;
3388+ goto out;
3389+ }
3390+
3391+ result = unionfs_lookup_full(dentry, parent, interpose_flag);
3392+ if (result) {
3393+ if (IS_ERR(result)) {
3394+ valid = false;
3395+ goto out;
3396+ }
3397+ /*
3398+ * current unionfs_lookup_backend() doesn't return
3399+ * a valid dentry
3400+ */
3401+ dput(dentry);
3402+ dentry = result;
3403+ }
3404+
3405+ if (unlikely(positive && is_negative_lower(dentry))) {
3406+ /* call make_bad_inode here ? */
3407+ d_drop(dentry);
3408+ valid = false;
3409+ goto out;
3410+ }
3411+
3412+ /*
3413+ * if we got here then we have revalidated our dentry and all lower
3414+ * ones, so we can return safely.
3415+ */
3416+ if (!valid) /* lower dentry revalidation failed */
3417+ goto out;
3418+
3419+ /*
3420+ * If the parent's gen no. matches the superblock's gen no., then
3421+ * we can update our denty's gen no. If they didn't match, then it
3422+ * was OK to revalidate this dentry with a stale parent, but we'll
3423+ * purposely not update our dentry's gen no. (so it can be redone);
3424+ * and, we'll mark our parent dentry as invalid so it'll force it
3425+ * (and our dentry) to be revalidated.
3426+ */
3427+ if (pdgen == sbgen)
3428+ atomic_set(&UNIONFS_D(dentry)->generation, sbgen);
3429+ goto out;
3430+
3431+validate_lowers:
3432+
3433+ /* The revalidation must occur across all branches */
3434+ bstart = dbstart(dentry);
3435+ bend = dbend(dentry);
3436+ BUG_ON(bstart == -1);
3437+ for (bindex = bstart; bindex <= bend; bindex++) {
3438+ int err;
3439+ struct nameidata lower_nd;
3440+
3441+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3442+ if (!lower_dentry || !lower_dentry->d_op
3443+ || !lower_dentry->d_op->d_revalidate)
3444+ continue;
3445+ /*
3446+ * Don't pass nameidata to lower file system, because we
3447+ * don't want an arbitrary lower file being opened or
3448+ * returned to us: it may be useless to us because of the
3449+ * fanout nature of unionfs (cf. file/directory open-file
3450+ * invariants). We will open lower files as and when needed
3451+ * later on.
3452+ */
3453+ err = init_lower_nd(&lower_nd, LOOKUP_OPEN);
3454+ if (unlikely(err < 0)) {
3455+ valid = false;
3456+ break;
3457+ }
3458+ if (!lower_dentry->d_op->d_revalidate(lower_dentry, &lower_nd))
3459+ valid = false;
3460+ release_lower_nd(&lower_nd, err);
3461+ }
3462+
3463+ if (!dentry->d_inode ||
3464+ ibstart(dentry->d_inode) < 0 ||
3465+ ibend(dentry->d_inode) < 0) {
3466+ valid = false;
3467+ goto out;
3468+ }
3469+
3470+ if (valid) {
3471+ /*
3472+ * If we get here, and we copy the meta-data from the lower
3473+ * inode to our inode, then it is vital that we have already
3474+ * purged all unionfs-level file data. We do that in the
3475+ * caller (__unionfs_d_revalidate) by calling
3476+ * purge_inode_data.
3477+ */
3478+ unionfs_copy_attr_all(dentry->d_inode,
3479+ unionfs_lower_inode(dentry->d_inode));
3480+ fsstack_copy_inode_size(dentry->d_inode,
3481+ unionfs_lower_inode(dentry->d_inode));
3482+ }
3483+
3484+out:
3485+ return valid;
3486+}
3487+
3488+/*
3489+ * Determine if the lower inode objects have changed from below the unionfs
3490+ * inode. Return true if changed, false otherwise.
3491+ *
3492+ * We check if the mtime or ctime have changed. However, the inode times
3493+ * can be changed by anyone without much protection, including
3494+ * asynchronously. This can sometimes cause unionfs to find that the lower
3495+ * file system doesn't change its inode times quick enough, resulting in a
3496+ * false positive indication (which is harmless, it just makes unionfs do
3497+ * extra work in re-validating the objects). To minimize the chances of
3498+ * these situations, we still consider such small time changes valid, but we
3499+ * don't print debugging messages unless the time changes are greater than
3500+ * UNIONFS_MIN_CC_TIME (which defaults to 3 seconds, as with NFS's acregmin)
3501+ * because significant changes are more likely due to users manually
3502+ * touching lower files.
3503+ */
3504+bool is_newer_lower(const struct dentry *dentry)
3505+{
3506+ int bindex;
3507+ struct inode *inode;
3508+ struct inode *lower_inode;
3509+
3510+ /* ignore if we're called on semi-initialized dentries/inodes */
3511+ if (!dentry || !UNIONFS_D(dentry))
3512+ return false;
3513+ inode = dentry->d_inode;
3514+ if (!inode || !UNIONFS_I(inode)->lower_inodes ||
3515+ ibstart(inode) < 0 || ibend(inode) < 0)
3516+ return false;
3517+
3518+ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3519+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
3520+ if (!lower_inode)
3521+ continue;
3522+
3523+ /* check if mtime/ctime have changed */
3524+ if (unlikely(timespec_compare(&inode->i_mtime,
3525+ &lower_inode->i_mtime) < 0)) {
3526+ if ((lower_inode->i_mtime.tv_sec -
3527+ inode->i_mtime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3528+ pr_info("unionfs: new lower inode mtime "
3529+ "(bindex=%d, name=%s)\n", bindex,
3530+ dentry->d_name.name);
3531+ show_dinode_times(dentry);
3532+ }
3533+ return true;
3534+ }
3535+ if (unlikely(timespec_compare(&inode->i_ctime,
3536+ &lower_inode->i_ctime) < 0)) {
3537+ if ((lower_inode->i_ctime.tv_sec -
3538+ inode->i_ctime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3539+ pr_info("unionfs: new lower inode ctime "
3540+ "(bindex=%d, name=%s)\n", bindex,
3541+ dentry->d_name.name);
3542+ show_dinode_times(dentry);
3543+ }
3544+ return true;
3545+ }
3546+ }
3547+
3548+ /*
3549+ * Last check: if this is a positive dentry, but somehow all lower
3550+ * dentries are negative or unhashed, then this dentry needs to be
3551+ * revalidated, because someone probably deleted the objects from
3552+ * the lower branches directly.
3553+ */
3554+ if (is_negative_lower(dentry))
3555+ return true;
3556+
3557+ return false; /* default: lower is not newer */
3558+}
3559+
3560+static int unionfs_d_revalidate(struct dentry *dentry,
3561+ struct nameidata *nd_unused)
3562+{
3563+ bool valid = true;
3564+ int err = 1; /* 1 means valid for the VFS */
3565+ struct dentry *parent;
3566+
3567+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3568+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3569+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3570+
3571+ valid = __unionfs_d_revalidate(dentry, parent, false);
3572+ if (valid) {
3573+ unionfs_postcopyup_setmnt(dentry);
3574+ unionfs_check_dentry(dentry);
3575+ } else {
3576+ d_drop(dentry);
3577+ err = valid;
3578+ }
3579+ unionfs_unlock_dentry(dentry);
3580+ unionfs_unlock_parent(dentry, parent);
3581+ unionfs_read_unlock(dentry->d_sb);
3582+
3583+ return err;
3584+}
3585+
3586+static void unionfs_d_release(struct dentry *dentry)
3587+{
3588+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3589+ if (unlikely(!UNIONFS_D(dentry)))
3590+ goto out; /* skip if no lower branches */
3591+ /* must lock our branch configuration here */
3592+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3593+
3594+ unionfs_check_dentry(dentry);
3595+ /* this could be a negative dentry, so check first */
3596+ if (dbstart(dentry) < 0) {
3597+ unionfs_unlock_dentry(dentry);
3598+ goto out; /* due to a (normal) failed lookup */
3599+ }
3600+
3601+ /* Release all the lower dentries */
3602+ path_put_lowers_all(dentry, true);
3603+
3604+ unionfs_unlock_dentry(dentry);
3605+
3606+out:
3607+ free_dentry_private_data(dentry);
3608+ unionfs_read_unlock(dentry->d_sb);
3609+ return;
3610+}
3611+
3612+/*
3613+ * Called when we're removing the last reference to our dentry. So we
3614+ * should drop all lower references too.
3615+ */
3616+static void unionfs_d_iput(struct dentry *dentry, struct inode *inode)
3617+{
3618+ int rc;
3619+
3620+ BUG_ON(!dentry);
3621+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3622+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3623+
3624+ if (!UNIONFS_D(dentry) || dbstart(dentry) < 0)
3625+ goto drop_lower_inodes;
3626+ path_put_lowers_all(dentry, false);
3627+
3628+drop_lower_inodes:
3629+ rc = atomic_read(&inode->i_count);
3630+ if (rc == 1 && inode->i_nlink == 1 && ibstart(inode) >= 0) {
3631+ /* see Documentation/filesystems/unionfs/issues.txt */
3632+ lockdep_off();
3633+ iput(unionfs_lower_inode(inode));
3634+ lockdep_on();
3635+ unionfs_set_lower_inode(inode, NULL);
3636+ /* XXX: may need to set start/end to -1? */
3637+ }
3638+
3639+ iput(inode);
3640+
3641+ unionfs_unlock_dentry(dentry);
3642+ unionfs_read_unlock(dentry->d_sb);
3643+}
3644+
3645+struct dentry_operations unionfs_dops = {
3646+ .d_revalidate = unionfs_d_revalidate,
3647+ .d_release = unionfs_d_release,
3648+ .d_iput = unionfs_d_iput,
3649+};
3650diff --git a/fs/unionfs/dirfops.c b/fs/unionfs/dirfops.c
3651new file mode 100644
3652index 0000000..72a9c1a
3653--- /dev/null
3654+++ b/fs/unionfs/dirfops.c
3655@@ -0,0 +1,302 @@
3656+/*
3657+ * Copyright (c) 2003-2011 Erez Zadok
3658+ * Copyright (c) 2003-2006 Charles P. Wright
3659+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3660+ * Copyright (c) 2005-2006 Junjiro Okajima
3661+ * Copyright (c) 2005 Arun M. Krishnakumar
3662+ * Copyright (c) 2004-2006 David P. Quigley
3663+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3664+ * Copyright (c) 2003 Puja Gupta
3665+ * Copyright (c) 2003 Harikesavan Krishnan
3666+ * Copyright (c) 2003-2011 Stony Brook University
3667+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
3668+ *
3669+ * This program is free software; you can redistribute it and/or modify
3670+ * it under the terms of the GNU General Public License version 2 as
3671+ * published by the Free Software Foundation.
3672+ */
3673+
3674+#include "union.h"
3675+
3676+/* Make sure our rdstate is playing by the rules. */
3677+static void verify_rdstate_offset(struct unionfs_dir_state *rdstate)
3678+{
3679+ BUG_ON(rdstate->offset >= DIREOF);
3680+ BUG_ON(rdstate->cookie >= MAXRDCOOKIE);
3681+}
3682+
3683+struct unionfs_getdents_callback {
3684+ struct unionfs_dir_state *rdstate;
3685+ void *dirent;
3686+ int entries_written;
3687+ int filldir_called;
3688+ int filldir_error;
3689+ filldir_t filldir;
3690+ struct super_block *sb;
3691+};
3692+
3693+/* based on generic filldir in fs/readir.c */
3694+static int unionfs_filldir(void *dirent, const char *oname, int namelen,
3695+ loff_t offset, u64 ino, unsigned int d_type)
3696+{
3697+ struct unionfs_getdents_callback *buf = dirent;
3698+ struct filldir_node *found = NULL;
3699+ int err = 0;
3700+ int is_whiteout;
3701+ char *name = (char *) oname;
3702+
3703+ buf->filldir_called++;
3704+
3705+ is_whiteout = is_whiteout_name(&name, &namelen);
3706+
3707+ found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout);
3708+
3709+ if (found) {
3710+ /*
3711+ * If we had non-whiteout entry in dir cache, then mark it
3712+ * as a whiteout and but leave it in the dir cache.
3713+ */
3714+ if (is_whiteout && !found->whiteout)
3715+ found->whiteout = is_whiteout;
3716+ goto out;
3717+ }
3718+
3719+ /* if 'name' isn't a whiteout, filldir it. */
3720+ if (!is_whiteout) {
3721+ off_t pos = rdstate2offset(buf->rdstate);
3722+ u64 unionfs_ino = ino;
3723+
3724+ err = buf->filldir(buf->dirent, name, namelen, pos,
3725+ unionfs_ino, d_type);
3726+ buf->rdstate->offset++;
3727+ verify_rdstate_offset(buf->rdstate);
3728+ }
3729+ /*
3730+ * If we did fill it, stuff it in our hash, otherwise return an
3731+ * error.
3732+ */
3733+ if (err) {
3734+ buf->filldir_error = err;
3735+ goto out;
3736+ }
3737+ buf->entries_written++;
3738+ err = add_filldir_node(buf->rdstate, name, namelen,
3739+ buf->rdstate->bindex, is_whiteout);
3740+ if (err)
3741+ buf->filldir_error = err;
3742+
3743+out:
3744+ return err;
3745+}
3746+
3747+static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir)
3748+{
3749+ int err = 0;
3750+ struct file *lower_file = NULL;
3751+ struct dentry *dentry = file->f_path.dentry;
3752+ struct dentry *parent;
3753+ struct inode *inode = NULL;
3754+ struct unionfs_getdents_callback buf;
3755+ struct unionfs_dir_state *uds;
3756+ int bend;
3757+ loff_t offset;
3758+
3759+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3760+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3761+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3762+
3763+ err = unionfs_file_revalidate(file, parent, false);
3764+ if (unlikely(err))
3765+ goto out;
3766+
3767+ inode = dentry->d_inode;
3768+
3769+ uds = UNIONFS_F(file)->rdstate;
3770+ if (!uds) {
3771+ if (file->f_pos == DIREOF) {
3772+ goto out;
3773+ } else if (file->f_pos > 0) {
3774+ uds = find_rdstate(inode, file->f_pos);
3775+ if (unlikely(!uds)) {
3776+ err = -ESTALE;
3777+ goto out;
3778+ }
3779+ UNIONFS_F(file)->rdstate = uds;
3780+ } else {
3781+ init_rdstate(file);
3782+ uds = UNIONFS_F(file)->rdstate;
3783+ }
3784+ }
3785+ bend = fbend(file);
3786+
3787+ while (uds->bindex <= bend) {
3788+ lower_file = unionfs_lower_file_idx(file, uds->bindex);
3789+ if (!lower_file) {
3790+ uds->bindex++;
3791+ uds->dirpos = 0;
3792+ continue;
3793+ }
3794+
3795+ /* prepare callback buffer */
3796+ buf.filldir_called = 0;
3797+ buf.filldir_error = 0;
3798+ buf.entries_written = 0;
3799+ buf.dirent = dirent;
3800+ buf.filldir = filldir;
3801+ buf.rdstate = uds;
3802+ buf.sb = inode->i_sb;
3803+
3804+ /* Read starting from where we last left off. */
3805+ offset = vfs_llseek(lower_file, uds->dirpos, SEEK_SET);
3806+ if (offset < 0) {
3807+ err = offset;
3808+ goto out;
3809+ }
3810+ err = vfs_readdir(lower_file, unionfs_filldir, &buf);
3811+
3812+ /* Save the position for when we continue. */
3813+ offset = vfs_llseek(lower_file, 0, SEEK_CUR);
3814+ if (offset < 0) {
3815+ err = offset;
3816+ goto out;
3817+ }
3818+ uds->dirpos = offset;
3819+
3820+ /* Copy the atime. */
3821+ fsstack_copy_attr_atime(inode,
3822+ lower_file->f_path.dentry->d_inode);
3823+
3824+ if (err < 0)
3825+ goto out;
3826+
3827+ if (buf.filldir_error)
3828+ break;
3829+
3830+ if (!buf.entries_written) {
3831+ uds->bindex++;
3832+ uds->dirpos = 0;
3833+ }
3834+ }
3835+
3836+ if (!buf.filldir_error && uds->bindex >= bend) {
3837+ /* Save the number of hash entries for next time. */
3838+ UNIONFS_I(inode)->hashsize = uds->hashentries;
3839+ free_rdstate(uds);
3840+ UNIONFS_F(file)->rdstate = NULL;
3841+ file->f_pos = DIREOF;
3842+ } else {
3843+ file->f_pos = rdstate2offset(uds);
3844+ }
3845+
3846+out:
3847+ if (!err)
3848+ unionfs_check_file(file);
3849+ unionfs_unlock_dentry(dentry);
3850+ unionfs_unlock_parent(dentry, parent);
3851+ unionfs_read_unlock(dentry->d_sb);
3852+ return err;
3853+}
3854+
3855+/*
3856+ * This is not meant to be a generic repositioning function. If you do
3857+ * things that aren't supported, then we return EINVAL.
3858+ *
3859+ * What is allowed:
3860+ * (1) seeking to the same position that you are currently at
3861+ * This really has no effect, but returns where you are.
3862+ * (2) seeking to the beginning of the file
3863+ * This throws out all state, and lets you begin again.
3864+ */
3865+static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin)
3866+{
3867+ struct unionfs_dir_state *rdstate;
3868+ struct dentry *dentry = file->f_path.dentry;
3869+ struct dentry *parent;
3870+ loff_t err;
3871+
3872+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3873+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3874+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3875+
3876+ err = unionfs_file_revalidate(file, parent, false);
3877+ if (unlikely(err))
3878+ goto out;
3879+
3880+ rdstate = UNIONFS_F(file)->rdstate;
3881+
3882+ /*
3883+ * we let users seek to their current position, but not anywhere
3884+ * else.
3885+ */
3886+ if (!offset) {
3887+ switch (origin) {
3888+ case SEEK_SET:
3889+ if (rdstate) {
3890+ free_rdstate(rdstate);
3891+ UNIONFS_F(file)->rdstate = NULL;
3892+ }
3893+ init_rdstate(file);
3894+ err = 0;
3895+ break;
3896+ case SEEK_CUR:
3897+ err = file->f_pos;
3898+ break;
3899+ case SEEK_END:
3900+ /* Unsupported, because we would break everything. */
3901+ err = -EINVAL;
3902+ break;
3903+ }
3904+ } else {
3905+ switch (origin) {
3906+ case SEEK_SET:
3907+ if (rdstate) {
3908+ if (offset == rdstate2offset(rdstate))
3909+ err = offset;
3910+ else if (file->f_pos == DIREOF)
3911+ err = DIREOF;
3912+ else
3913+ err = -EINVAL;
3914+ } else {
3915+ struct inode *inode;
3916+ inode = dentry->d_inode;
3917+ rdstate = find_rdstate(inode, offset);
3918+ if (rdstate) {
3919+ UNIONFS_F(file)->rdstate = rdstate;
3920+ err = rdstate->offset;
3921+ } else {
3922+ err = -EINVAL;
3923+ }
3924+ }
3925+ break;
3926+ case SEEK_CUR:
3927+ case SEEK_END:
3928+ /* Unsupported, because we would break everything. */
3929+ err = -EINVAL;
3930+ break;
3931+ }
3932+ }
3933+
3934+out:
3935+ if (!err)
3936+ unionfs_check_file(file);
3937+ unionfs_unlock_dentry(dentry);
3938+ unionfs_unlock_parent(dentry, parent);
3939+ unionfs_read_unlock(dentry->d_sb);
3940+ return err;
3941+}
3942+
3943+/*
3944+ * Trimmed directory options, we shouldn't pass everything down since
3945+ * we don't want to operate on partial directories.
3946+ */
3947+struct file_operations unionfs_dir_fops = {
3948+ .llseek = unionfs_dir_llseek,
3949+ .read = generic_read_dir,
3950+ .readdir = unionfs_readdir,
3951+ .unlocked_ioctl = unionfs_ioctl,
3952+ .open = unionfs_open,
3953+ .release = unionfs_file_release,
3954+ .flush = unionfs_flush,
3955+ .fsync = unionfs_fsync,
3956+ .fasync = unionfs_fasync,
3957+};
3958diff --git a/fs/unionfs/dirhelper.c b/fs/unionfs/dirhelper.c
3959new file mode 100644
3960index 0000000..62ec9af
3961--- /dev/null
3962+++ b/fs/unionfs/dirhelper.c
3963@@ -0,0 +1,158 @@
3964+/*
3965+ * Copyright (c) 2003-2011 Erez Zadok
3966+ * Copyright (c) 2003-2006 Charles P. Wright
3967+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3968+ * Copyright (c) 2005-2006 Junjiro Okajima
3969+ * Copyright (c) 2005 Arun M. Krishnakumar
3970+ * Copyright (c) 2004-2006 David P. Quigley
3971+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3972+ * Copyright (c) 2003 Puja Gupta
3973+ * Copyright (c) 2003 Harikesavan Krishnan
3974+ * Copyright (c) 2003-2011 Stony Brook University
3975+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
3976+ *
3977+ * This program is free software; you can redistribute it and/or modify
3978+ * it under the terms of the GNU General Public License version 2 as
3979+ * published by the Free Software Foundation.
3980+ */
3981+
3982+#include "union.h"
3983+
3984+#define RD_NONE 0
3985+#define RD_CHECK_EMPTY 1
3986+/* The callback structure for check_empty. */
3987+struct unionfs_rdutil_callback {
3988+ int err;
3989+ int filldir_called;
3990+ struct unionfs_dir_state *rdstate;
3991+ int mode;
3992+};
3993+
3994+/* This filldir function makes sure only whiteouts exist within a directory. */
3995+static int readdir_util_callback(void *dirent, const char *oname, int namelen,
3996+ loff_t offset, u64 ino, unsigned int d_type)
3997+{
3998+ int err = 0;
3999+ struct unionfs_rdutil_callback *buf = dirent;
4000+ int is_whiteout;
4001+ struct filldir_node *found;
4002+ char *name = (char *) oname;
4003+
4004+ buf->filldir_called = 1;
4005+
4006+ if (name[0] == '.' && (namelen == 1 ||
4007+ (name[1] == '.' && namelen == 2)))
4008+ goto out;
4009+
4010+ is_whiteout = is_whiteout_name(&name, &namelen);
4011+
4012+ found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout);
4013+ /* If it was found in the table there was a previous whiteout. */
4014+ if (found)
4015+ goto out;
4016+
4017+ /*
4018+ * if it wasn't found and isn't a whiteout, the directory isn't
4019+ * empty.
4020+ */
4021+ err = -ENOTEMPTY;
4022+ if ((buf->mode == RD_CHECK_EMPTY) && !is_whiteout)
4023+ goto out;
4024+
4025+ err = add_filldir_node(buf->rdstate, name, namelen,
4026+ buf->rdstate->bindex, is_whiteout);
4027+
4028+out:
4029+ buf->err = err;
4030+ return err;
4031+}
4032+
4033+/* Is a directory logically empty? */
4034+int check_empty(struct dentry *dentry, struct dentry *parent,
4035+ struct unionfs_dir_state **namelist)
4036+{
4037+ int err = 0;
4038+ struct dentry *lower_dentry = NULL;
4039+ struct vfsmount *mnt;
4040+ struct super_block *sb;
4041+ struct file *lower_file;
4042+ struct unionfs_rdutil_callback *buf = NULL;
4043+ int bindex, bstart, bend, bopaque;
4044+
4045+ sb = dentry->d_sb;
4046+
4047+
4048+ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
4049+
4050+ err = unionfs_partial_lookup(dentry, parent);
4051+ if (err)
4052+ goto out;
4053+
4054+ bstart = dbstart(dentry);
4055+ bend = dbend(dentry);
4056+ bopaque = dbopaque(dentry);
4057+ if (0 <= bopaque && bopaque < bend)
4058+ bend = bopaque;
4059+
4060+ buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL);
4061+ if (unlikely(!buf)) {
4062+ err = -ENOMEM;
4063+ goto out;
4064+ }
4065+ buf->err = 0;
4066+ buf->mode = RD_CHECK_EMPTY;
4067+ buf->rdstate = alloc_rdstate(dentry->d_inode, bstart);
4068+ if (unlikely(!buf->rdstate)) {
4069+ err = -ENOMEM;
4070+ goto out;
4071+ }
4072+
4073+ /* Process the lower directories with rdutil_callback as a filldir. */
4074+ for (bindex = bstart; bindex <= bend; bindex++) {
4075+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4076+ if (!lower_dentry)
4077+ continue;
4078+ if (!lower_dentry->d_inode)
4079+ continue;
4080+ if (!S_ISDIR(lower_dentry->d_inode->i_mode))
4081+ continue;
4082+
4083+ dget(lower_dentry);
4084+ mnt = unionfs_mntget(dentry, bindex);
4085+ branchget(sb, bindex);
4086+ lower_file = dentry_open(lower_dentry, mnt, O_RDONLY, current_cred());
4087+ if (IS_ERR(lower_file)) {
4088+ err = PTR_ERR(lower_file);
4089+ branchput(sb, bindex);
4090+ goto out;
4091+ }
4092+
4093+ do {
4094+ buf->filldir_called = 0;
4095+ buf->rdstate->bindex = bindex;
4096+ err = vfs_readdir(lower_file,
4097+ readdir_util_callback, buf);
4098+ if (buf->err)
4099+ err = buf->err;
4100+ } while ((err >= 0) && buf->filldir_called);
4101+
4102+ /* fput calls dput for lower_dentry */
4103+ fput(lower_file);
4104+ branchput(sb, bindex);
4105+
4106+ if (err < 0)
4107+ goto out;
4108+ }
4109+
4110+out:
4111+ if (buf) {
4112+ if (namelist && !err)
4113+ *namelist = buf->rdstate;
4114+ else if (buf->rdstate)
4115+ free_rdstate(buf->rdstate);
4116+ kfree(buf);
4117+ }
4118+
4119+
4120+ return err;
4121+}
4122diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h
4123new file mode 100644
4124index 0000000..ae1b86a
4125--- /dev/null
4126+++ b/fs/unionfs/fanout.h
4127@@ -0,0 +1,407 @@
4128+/*
4129+ * Copyright (c) 2003-2011 Erez Zadok
4130+ * Copyright (c) 2003-2006 Charles P. Wright
4131+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4132+ * Copyright (c) 2005 Arun M. Krishnakumar
4133+ * Copyright (c) 2004-2006 David P. Quigley
4134+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4135+ * Copyright (c) 2003 Puja Gupta
4136+ * Copyright (c) 2003 Harikesavan Krishnan
4137+ * Copyright (c) 2003-2011 Stony Brook University
4138+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
4139+ *
4140+ * This program is free software; you can redistribute it and/or modify
4141+ * it under the terms of the GNU General Public License version 2 as
4142+ * published by the Free Software Foundation.
4143+ */
4144+
4145+#ifndef _FANOUT_H_
4146+#define _FANOUT_H_
4147+
4148+/*
4149+ * Inode to private data
4150+ *
4151+ * Since we use containers and the struct inode is _inside_ the
4152+ * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL
4153+ * inode pointer), return a valid non-NULL pointer.
4154+ */
4155+static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode)
4156+{
4157+ return container_of(inode, struct unionfs_inode_info, vfs_inode);
4158+}
4159+
4160+#define ibstart(ino) (UNIONFS_I(ino)->bstart)
4161+#define ibend(ino) (UNIONFS_I(ino)->bend)
4162+
4163+/* Dentry to private data */
4164+#define UNIONFS_D(dent) ((struct unionfs_dentry_info *)(dent)->d_fsdata)
4165+#define dbstart(dent) (UNIONFS_D(dent)->bstart)
4166+#define dbend(dent) (UNIONFS_D(dent)->bend)
4167+#define dbopaque(dent) (UNIONFS_D(dent)->bopaque)
4168+
4169+/* Superblock to private data */
4170+#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info)
4171+#define sbstart(sb) 0
4172+#define sbend(sb) (UNIONFS_SB(sb)->bend)
4173+#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1)
4174+#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id)
4175+
4176+/* File to private Data */
4177+#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data))
4178+#define fbstart(file) (UNIONFS_F(file)->bstart)
4179+#define fbend(file) (UNIONFS_F(file)->bend)
4180+
4181+/* macros to manipulate branch IDs in stored in our superblock */
4182+static inline int branch_id(struct super_block *sb, int index)
4183+{
4184+ BUG_ON(!sb || index < 0);
4185+ return UNIONFS_SB(sb)->data[index].branch_id;
4186+}
4187+
4188+static inline void set_branch_id(struct super_block *sb, int index, int val)
4189+{
4190+ BUG_ON(!sb || index < 0);
4191+ UNIONFS_SB(sb)->data[index].branch_id = val;
4192+}
4193+
4194+static inline void new_branch_id(struct super_block *sb, int index)
4195+{
4196+ BUG_ON(!sb || index < 0);
4197+ set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id);
4198+}
4199+
4200+/*
4201+ * Find new index of matching branch with an existing superblock of a known
4202+ * (possibly old) id. This is needed because branches could have been
4203+ * added/deleted causing the branches of any open files to shift.
4204+ *
4205+ * @sb: the new superblock which may have new/different branch IDs
4206+ * @id: the old/existing id we're looking for
4207+ * Returns index of newly found branch (0 or greater), -1 otherwise.
4208+ */
4209+static inline int branch_id_to_idx(struct super_block *sb, int id)
4210+{
4211+ int i;
4212+ for (i = 0; i < sbmax(sb); i++) {
4213+ if (branch_id(sb, i) == id)
4214+ return i;
4215+ }
4216+ /* in the non-ODF code, this should really never happen */
4217+ printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id);
4218+ return -1;
4219+}
4220+
4221+/* File to lower file. */
4222+static inline struct file *unionfs_lower_file(const struct file *f)
4223+{
4224+ BUG_ON(!f);
4225+ return UNIONFS_F(f)->lower_files[fbstart(f)];
4226+}
4227+
4228+static inline struct file *unionfs_lower_file_idx(const struct file *f,
4229+ int index)
4230+{
4231+ BUG_ON(!f || index < 0);
4232+ return UNIONFS_F(f)->lower_files[index];
4233+}
4234+
4235+static inline void unionfs_set_lower_file_idx(struct file *f, int index,
4236+ struct file *val)
4237+{
4238+ BUG_ON(!f || index < 0);
4239+ UNIONFS_F(f)->lower_files[index] = val;
4240+ /* save branch ID (may be redundant?) */
4241+ UNIONFS_F(f)->saved_branch_ids[index] =
4242+ branch_id((f)->f_path.dentry->d_sb, index);
4243+}
4244+
4245+static inline void unionfs_set_lower_file(struct file *f, struct file *val)
4246+{
4247+ BUG_ON(!f);
4248+ unionfs_set_lower_file_idx((f), fbstart(f), (val));
4249+}
4250+
4251+/* Inode to lower inode. */
4252+static inline struct inode *unionfs_lower_inode(const struct inode *i)
4253+{
4254+ BUG_ON(!i);
4255+ return UNIONFS_I(i)->lower_inodes[ibstart(i)];
4256+}
4257+
4258+static inline struct inode *unionfs_lower_inode_idx(const struct inode *i,
4259+ int index)
4260+{
4261+ BUG_ON(!i || index < 0);
4262+ return UNIONFS_I(i)->lower_inodes[index];
4263+}
4264+
4265+static inline void unionfs_set_lower_inode_idx(struct inode *i, int index,
4266+ struct inode *val)
4267+{
4268+ BUG_ON(!i || index < 0);
4269+ UNIONFS_I(i)->lower_inodes[index] = val;
4270+}
4271+
4272+static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val)
4273+{
4274+ BUG_ON(!i);
4275+ UNIONFS_I(i)->lower_inodes[ibstart(i)] = val;
4276+}
4277+
4278+/* Superblock to lower superblock. */
4279+static inline struct super_block *unionfs_lower_super(
4280+ const struct super_block *sb)
4281+{
4282+ BUG_ON(!sb);
4283+ return UNIONFS_SB(sb)->data[sbstart(sb)].sb;
4284+}
4285+
4286+static inline struct super_block *unionfs_lower_super_idx(
4287+ const struct super_block *sb,
4288+ int index)
4289+{
4290+ BUG_ON(!sb || index < 0);
4291+ return UNIONFS_SB(sb)->data[index].sb;
4292+}
4293+
4294+static inline void unionfs_set_lower_super_idx(struct super_block *sb,
4295+ int index,
4296+ struct super_block *val)
4297+{
4298+ BUG_ON(!sb || index < 0);
4299+ UNIONFS_SB(sb)->data[index].sb = val;
4300+}
4301+
4302+static inline void unionfs_set_lower_super(struct super_block *sb,
4303+ struct super_block *val)
4304+{
4305+ BUG_ON(!sb);
4306+ UNIONFS_SB(sb)->data[sbstart(sb)].sb = val;
4307+}
4308+
4309+/* Branch count macros. */
4310+static inline int branch_count(const struct super_block *sb, int index)
4311+{
4312+ BUG_ON(!sb || index < 0);
4313+ return atomic_read(&UNIONFS_SB(sb)->data[index].open_files);
4314+}
4315+
4316+static inline void set_branch_count(struct super_block *sb, int index, int val)
4317+{
4318+ BUG_ON(!sb || index < 0);
4319+ atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val);
4320+}
4321+
4322+static inline void branchget(struct super_block *sb, int index)
4323+{
4324+ BUG_ON(!sb || index < 0);
4325+ atomic_inc(&UNIONFS_SB(sb)->data[index].open_files);
4326+}
4327+
4328+static inline void branchput(struct super_block *sb, int index)
4329+{
4330+ BUG_ON(!sb || index < 0);
4331+ atomic_dec(&UNIONFS_SB(sb)->data[index].open_files);
4332+}
4333+
4334+/* Dentry macros */
4335+static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index,
4336+ struct dentry *val)
4337+{
4338+ BUG_ON(!dent || index < 0);
4339+ UNIONFS_D(dent)->lower_paths[index].dentry = val;
4340+}
4341+
4342+static inline struct dentry *unionfs_lower_dentry_idx(
4343+ const struct dentry *dent,
4344+ int index)
4345+{
4346+ BUG_ON(!dent || index < 0);
4347+ return UNIONFS_D(dent)->lower_paths[index].dentry;
4348+}
4349+
4350+static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent)
4351+{
4352+ BUG_ON(!dent);
4353+ return unionfs_lower_dentry_idx(dent, dbstart(dent));
4354+}
4355+
4356+static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index,
4357+ struct vfsmount *mnt)
4358+{
4359+ BUG_ON(!dent || index < 0);
4360+ UNIONFS_D(dent)->lower_paths[index].mnt = mnt;
4361+}
4362+
4363+static inline struct vfsmount *unionfs_lower_mnt_idx(
4364+ const struct dentry *dent,
4365+ int index)
4366+{
4367+ BUG_ON(!dent || index < 0);
4368+ return UNIONFS_D(dent)->lower_paths[index].mnt;
4369+}
4370+
4371+static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent)
4372+{
4373+ BUG_ON(!dent);
4374+ return unionfs_lower_mnt_idx(dent, dbstart(dent));
4375+}
4376+
4377+/* Macros for locking a dentry. */
4378+enum unionfs_dentry_lock_class {
4379+ UNIONFS_DMUTEX_NORMAL,
4380+ UNIONFS_DMUTEX_ROOT,
4381+ UNIONFS_DMUTEX_PARENT,
4382+ UNIONFS_DMUTEX_CHILD,
4383+ UNIONFS_DMUTEX_WHITEOUT,
4384+ UNIONFS_DMUTEX_REVAL_PARENT, /* for file/dentry revalidate */
4385+ UNIONFS_DMUTEX_REVAL_CHILD, /* for file/dentry revalidate */
4386+};
4387+
4388+static inline void unionfs_lock_dentry(struct dentry *d,
4389+ unsigned int subclass)
4390+{
4391+ BUG_ON(!d);
4392+ mutex_lock_nested(&UNIONFS_D(d)->lock, subclass);
4393+}
4394+
4395+static inline void unionfs_unlock_dentry(struct dentry *d)
4396+{
4397+ BUG_ON(!d);
4398+ mutex_unlock(&UNIONFS_D(d)->lock);
4399+}
4400+
4401+static inline struct dentry *unionfs_lock_parent(struct dentry *d,
4402+ unsigned int subclass)
4403+{
4404+ struct dentry *p;
4405+
4406+ BUG_ON(!d);
4407+ p = dget_parent(d);
4408+ if (p != d)
4409+ mutex_lock_nested(&UNIONFS_D(p)->lock, subclass);
4410+ return p;
4411+}
4412+
4413+static inline void unionfs_unlock_parent(struct dentry *d, struct dentry *p)
4414+{
4415+ BUG_ON(!d);
4416+ BUG_ON(!p);
4417+ if (p != d) {
4418+ BUG_ON(!mutex_is_locked(&UNIONFS_D(p)->lock));
4419+ mutex_unlock(&UNIONFS_D(p)->lock);
4420+ }
4421+ dput(p);
4422+}
4423+
4424+static inline void verify_locked(struct dentry *d)
4425+{
4426+ BUG_ON(!d);
4427+ BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock));
4428+}
4429+
4430+/* macros to put lower objects */
4431+
4432+/*
4433+ * iput lower inodes of an unionfs dentry, from bstart to bend. If
4434+ * @free_lower is true, then also kfree the memory used to hold the lower
4435+ * object pointers.
4436+ */
4437+static inline void iput_lowers(struct inode *inode,
4438+ int bstart, int bend, bool free_lower)
4439+{
4440+ struct inode *lower_inode;
4441+ int bindex;
4442+
4443+ BUG_ON(!inode);
4444+ BUG_ON(!UNIONFS_I(inode));
4445+ BUG_ON(bstart < 0);
4446+
4447+ for (bindex = bstart; bindex <= bend; bindex++) {
4448+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
4449+ if (lower_inode) {
4450+ unionfs_set_lower_inode_idx(inode, bindex, NULL);
4451+ /* see Documentation/filesystems/unionfs/issues.txt */
4452+ lockdep_off();
4453+ iput(lower_inode);
4454+ lockdep_on();
4455+ }
4456+ }
4457+
4458+ if (free_lower) {
4459+ kfree(UNIONFS_I(inode)->lower_inodes);
4460+ UNIONFS_I(inode)->lower_inodes = NULL;
4461+ }
4462+}
4463+
4464+/* iput all lower inodes, and reset start/end branch indices to -1 */
4465+static inline void iput_lowers_all(struct inode *inode, bool free_lower)
4466+{
4467+ int bstart, bend;
4468+
4469+ BUG_ON(!inode);
4470+ BUG_ON(!UNIONFS_I(inode));
4471+ bstart = ibstart(inode);
4472+ bend = ibend(inode);
4473+ BUG_ON(bstart < 0);
4474+
4475+ iput_lowers(inode, bstart, bend, free_lower);
4476+ ibstart(inode) = ibend(inode) = -1;
4477+}
4478+
4479+/*
4480+ * dput/mntput all lower dentries and vfsmounts of an unionfs dentry, from
4481+ * bstart to bend. If @free_lower is true, then also kfree the memory used
4482+ * to hold the lower object pointers.
4483+ *
4484+ * XXX: implement using path_put VFS macros
4485+ */
4486+static inline void path_put_lowers(struct dentry *dentry,
4487+ int bstart, int bend, bool free_lower)
4488+{
4489+ struct dentry *lower_dentry;
4490+ struct vfsmount *lower_mnt;
4491+ int bindex;
4492+
4493+ BUG_ON(!dentry);
4494+ BUG_ON(!UNIONFS_D(dentry));
4495+ BUG_ON(bstart < 0);
4496+
4497+ for (bindex = bstart; bindex <= bend; bindex++) {
4498+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4499+ if (lower_dentry) {
4500+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
4501+ dput(lower_dentry);
4502+ }
4503+ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
4504+ if (lower_mnt) {
4505+ unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
4506+ mntput(lower_mnt);
4507+ }
4508+ }
4509+
4510+ if (free_lower) {
4511+ kfree(UNIONFS_D(dentry)->lower_paths);
4512+ UNIONFS_D(dentry)->lower_paths = NULL;
4513+ }
4514+}
4515+
4516+/*
4517+ * dput/mntput all lower dentries and vfsmounts, and reset start/end branch
4518+ * indices to -1.
4519+ */
4520+static inline void path_put_lowers_all(struct dentry *dentry, bool free_lower)
4521+{
4522+ int bstart, bend;
4523+
4524+ BUG_ON(!dentry);
4525+ BUG_ON(!UNIONFS_D(dentry));
4526+ bstart = dbstart(dentry);
4527+ bend = dbend(dentry);
4528+ BUG_ON(bstart < 0);
4529+
4530+ path_put_lowers(dentry, bstart, bend, free_lower);
4531+ dbstart(dentry) = dbend(dentry) = -1;
4532+}
4533+
4534+#endif /* not _FANOUT_H */
4535diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c
4536new file mode 100644
4537index 0000000..416c52f
4538--- /dev/null
4539+++ b/fs/unionfs/file.c
4540@@ -0,0 +1,382 @@
4541+/*
4542+ * Copyright (c) 2003-2011 Erez Zadok
4543+ * Copyright (c) 2003-2006 Charles P. Wright
4544+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4545+ * Copyright (c) 2005-2006 Junjiro Okajima
4546+ * Copyright (c) 2005 Arun M. Krishnakumar
4547+ * Copyright (c) 2004-2006 David P. Quigley
4548+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4549+ * Copyright (c) 2003 Puja Gupta
4550+ * Copyright (c) 2003 Harikesavan Krishnan
4551+ * Copyright (c) 2003-2011 Stony Brook University
4552+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
4553+ *
4554+ * This program is free software; you can redistribute it and/or modify
4555+ * it under the terms of the GNU General Public License version 2 as
4556+ * published by the Free Software Foundation.
4557+ */
4558+
4559+#include "union.h"
4560+
4561+static ssize_t unionfs_read(struct file *file, char __user *buf,
4562+ size_t count, loff_t *ppos)
4563+{
4564+ int err;
4565+ struct file *lower_file;
4566+ struct dentry *dentry = file->f_path.dentry;
4567+ struct dentry *parent;
4568+
4569+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4570+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4571+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4572+
4573+ err = unionfs_file_revalidate(file, parent, false);
4574+ if (unlikely(err))
4575+ goto out;
4576+
4577+ lower_file = unionfs_lower_file(file);
4578+ err = vfs_read(lower_file, buf, count, ppos);
4579+ /* update our inode atime upon a successful lower read */
4580+ if (err >= 0) {
4581+ fsstack_copy_attr_atime(dentry->d_inode,
4582+ lower_file->f_path.dentry->d_inode);
4583+ unionfs_check_file(file);
4584+ }
4585+
4586+out:
4587+ unionfs_unlock_dentry(dentry);
4588+ unionfs_unlock_parent(dentry, parent);
4589+ unionfs_read_unlock(dentry->d_sb);
4590+ return err;
4591+}
4592+
4593+static ssize_t unionfs_write(struct file *file, const char __user *buf,
4594+ size_t count, loff_t *ppos)
4595+{
4596+ int err = 0;
4597+ struct file *lower_file;
4598+ struct dentry *dentry = file->f_path.dentry;
4599+ struct dentry *parent;
4600+
4601+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4602+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4603+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4604+
4605+ err = unionfs_file_revalidate(file, parent, true);
4606+ if (unlikely(err))
4607+ goto out;
4608+
4609+ lower_file = unionfs_lower_file(file);
4610+ err = vfs_write(lower_file, buf, count, ppos);
4611+ /* update our inode times+sizes upon a successful lower write */
4612+ if (err >= 0) {
4613+ fsstack_copy_inode_size(dentry->d_inode,
4614+ lower_file->f_path.dentry->d_inode);
4615+ fsstack_copy_attr_times(dentry->d_inode,
4616+ lower_file->f_path.dentry->d_inode);
4617+ UNIONFS_F(file)->wrote_to_file = true; /* for delayed copyup */
4618+ unionfs_check_file(file);
4619+ }
4620+
4621+out:
4622+ unionfs_unlock_dentry(dentry);
4623+ unionfs_unlock_parent(dentry, parent);
4624+ unionfs_read_unlock(dentry->d_sb);
4625+ return err;
4626+}
4627+
4628+static int unionfs_file_readdir(struct file *file, void *dirent,
4629+ filldir_t filldir)
4630+{
4631+ return -ENOTDIR;
4632+}
4633+
4634+static int unionfs_mmap(struct file *file, struct vm_area_struct *vma)
4635+{
4636+ int err = 0;
4637+ bool willwrite;
4638+ struct file *lower_file;
4639+ struct dentry *dentry = file->f_path.dentry;
4640+ struct dentry *parent;
4641+ const struct vm_operations_struct *saved_vm_ops = NULL;
4642+
4643+ /*
4644+ * Since mm/memory.c:might_fault() (under PROVE_LOCKING) was
4645+ * modified in 2.6.29-rc1 to call might_lock_read on mmap_sem, this
4646+ * has been causing false positives in file system stacking layers.
4647+ * In particular, our ->mmap is called after sys_mmap2 already holds
4648+ * mmap_sem, then we lock our own mutexes; but earlier, it's
4649+ * possible for lockdep to have locked our mutexes first, and then
4650+ * we call a lower ->readdir which could call might_fault. The
4651+ * different ordering of the locks is what lockdep complains about
4652+ * -- unnecessarily. Therefore, we have no choice but to tell
4653+ * lockdep to temporarily turn off lockdep here. Note: the comments
4654+ * inside might_sleep also suggest that it would have been
4655+ * nicer to only annotate paths that needs that might_lock_read.
4656+ */
4657+ lockdep_off();
4658+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4659+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4660+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4661+
4662+ /* This might be deferred to mmap's writepage */
4663+ willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
4664+ err = unionfs_file_revalidate(file, parent, willwrite);
4665+ if (unlikely(err))
4666+ goto out;
4667+ unionfs_check_file(file);
4668+
4669+ /*
4670+ * File systems which do not implement ->writepage may use
4671+ * generic_file_readonly_mmap as their ->mmap op. If you call
4672+ * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
4673+ * But we cannot call the lower ->mmap op, so we can't tell that
4674+ * writeable mappings won't work. Therefore, our only choice is to
4675+ * check if the lower file system supports the ->writepage, and if
4676+ * not, return EINVAL (the same error that
4677+ * generic_file_readonly_mmap returns in that case).
4678+ */
4679+ lower_file = unionfs_lower_file(file);
4680+ if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
4681+ err = -EINVAL;
4682+ printk(KERN_ERR "unionfs: branch %d file system does not "
4683+ "support writeable mmap\n", fbstart(file));
4684+ goto out;
4685+ }
4686+
4687+ /*
4688+ * find and save lower vm_ops.
4689+ *
4690+ * XXX: the VFS should have a cleaner way of finding the lower vm_ops
4691+ */
4692+ if (!UNIONFS_F(file)->lower_vm_ops) {
4693+ err = lower_file->f_op->mmap(lower_file, vma);
4694+ if (err) {
4695+ printk(KERN_ERR "unionfs: lower mmap failed %d\n", err);
4696+ goto out;
4697+ }
4698+ saved_vm_ops = vma->vm_ops;
4699+ err = do_munmap(current->mm, vma->vm_start,
4700+ vma->vm_end - vma->vm_start);
4701+ if (err) {
4702+ printk(KERN_ERR "unionfs: do_munmap failed %d\n", err);
4703+ goto out;
4704+ }
4705+ }
4706+
4707+ file->f_mapping->a_ops = &unionfs_dummy_aops;
4708+ err = generic_file_mmap(file, vma);
4709+ file->f_mapping->a_ops = &unionfs_aops;
4710+ if (err) {
4711+ printk(KERN_ERR "unionfs: generic_file_mmap failed %d\n", err);
4712+ goto out;
4713+ }
4714+ vma->vm_ops = &unionfs_vm_ops;
4715+ if (!UNIONFS_F(file)->lower_vm_ops)
4716+ UNIONFS_F(file)->lower_vm_ops = saved_vm_ops;
4717+
4718+out:
4719+ if (!err) {
4720+ /* copyup could cause parent dir times to change */
4721+ unionfs_copy_attr_times(parent->d_inode);
4722+ unionfs_check_file(file);
4723+ }
4724+ unionfs_unlock_dentry(dentry);
4725+ unionfs_unlock_parent(dentry, parent);
4726+ unionfs_read_unlock(dentry->d_sb);
4727+ lockdep_on();
4728+ return err;
4729+}
4730+
4731+int unionfs_fsync(struct file *file, int datasync)
4732+{
4733+ int bindex, bstart, bend;
4734+ struct file *lower_file;
4735+ struct dentry *dentry = file->f_path.dentry;
4736+ struct dentry *lower_dentry;
4737+ struct dentry *parent;
4738+ struct inode *lower_inode, *inode;
4739+ int err = -EINVAL;
4740+
4741+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4742+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4743+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4744+
4745+ err = unionfs_file_revalidate(file, parent, true);
4746+ if (unlikely(err))
4747+ goto out;
4748+ unionfs_check_file(file);
4749+
4750+ bstart = fbstart(file);
4751+ bend = fbend(file);
4752+ if (bstart < 0 || bend < 0)
4753+ goto out;
4754+
4755+ inode = dentry->d_inode;
4756+ if (unlikely(!inode)) {
4757+ printk(KERN_ERR
4758+ "unionfs: null lower inode in unionfs_fsync\n");
4759+ goto out;
4760+ }
4761+ for (bindex = bstart; bindex <= bend; bindex++) {
4762+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
4763+ if (!lower_inode || !lower_inode->i_fop->fsync)
4764+ continue;
4765+ lower_file = unionfs_lower_file_idx(file, bindex);
4766+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4767+ mutex_lock(&lower_inode->i_mutex);
4768+ err = lower_inode->i_fop->fsync(lower_file, datasync);
4769+ if (!err && bindex == bstart)
4770+ fsstack_copy_attr_times(inode, lower_inode);
4771+ mutex_unlock(&lower_inode->i_mutex);
4772+ if (err)
4773+ goto out;
4774+ }
4775+
4776+out:
4777+ if (!err)
4778+ unionfs_check_file(file);
4779+ unionfs_unlock_dentry(dentry);
4780+ unionfs_unlock_parent(dentry, parent);
4781+ unionfs_read_unlock(dentry->d_sb);
4782+ return err;
4783+}
4784+
4785+int unionfs_fasync(int fd, struct file *file, int flag)
4786+{
4787+ int bindex, bstart, bend;
4788+ struct file *lower_file;
4789+ struct dentry *dentry = file->f_path.dentry;
4790+ struct dentry *parent;
4791+ struct inode *lower_inode, *inode;
4792+ int err = 0;
4793+
4794+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4795+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4796+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4797+
4798+ err = unionfs_file_revalidate(file, parent, true);
4799+ if (unlikely(err))
4800+ goto out;
4801+ unionfs_check_file(file);
4802+
4803+ bstart = fbstart(file);
4804+ bend = fbend(file);
4805+ if (bstart < 0 || bend < 0)
4806+ goto out;
4807+
4808+ inode = dentry->d_inode;
4809+ if (unlikely(!inode)) {
4810+ printk(KERN_ERR
4811+ "unionfs: null lower inode in unionfs_fasync\n");
4812+ goto out;
4813+ }
4814+ for (bindex = bstart; bindex <= bend; bindex++) {
4815+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
4816+ if (!lower_inode || !lower_inode->i_fop->fasync)
4817+ continue;
4818+ lower_file = unionfs_lower_file_idx(file, bindex);
4819+ mutex_lock(&lower_inode->i_mutex);
4820+ err = lower_inode->i_fop->fasync(fd, lower_file, flag);
4821+ if (!err && bindex == bstart)
4822+ fsstack_copy_attr_times(inode, lower_inode);
4823+ mutex_unlock(&lower_inode->i_mutex);
4824+ if (err)
4825+ goto out;
4826+ }
4827+
4828+out:
4829+ if (!err)
4830+ unionfs_check_file(file);
4831+ unionfs_unlock_dentry(dentry);
4832+ unionfs_unlock_parent(dentry, parent);
4833+ unionfs_read_unlock(dentry->d_sb);
4834+ return err;
4835+}
4836+
4837+static ssize_t unionfs_splice_read(struct file *file, loff_t *ppos,
4838+ struct pipe_inode_info *pipe, size_t len,
4839+ unsigned int flags)
4840+{
4841+ ssize_t err;
4842+ struct file *lower_file;
4843+ struct dentry *dentry = file->f_path.dentry;
4844+ struct dentry *parent;
4845+
4846+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4847+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4848+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4849+
4850+ err = unionfs_file_revalidate(file, parent, false);
4851+ if (unlikely(err))
4852+ goto out;
4853+
4854+ lower_file = unionfs_lower_file(file);
4855+ err = vfs_splice_to(lower_file, ppos, pipe, len, flags);
4856+ /* update our inode atime upon a successful lower splice-read */
4857+ if (err >= 0) {
4858+ fsstack_copy_attr_atime(dentry->d_inode,
4859+ lower_file->f_path.dentry->d_inode);
4860+ unionfs_check_file(file);
4861+ }
4862+
4863+out:
4864+ unionfs_unlock_dentry(dentry);
4865+ unionfs_unlock_parent(dentry, parent);
4866+ unionfs_read_unlock(dentry->d_sb);
4867+ return err;
4868+}
4869+
4870+static ssize_t unionfs_splice_write(struct pipe_inode_info *pipe,
4871+ struct file *file, loff_t *ppos,
4872+ size_t len, unsigned int flags)
4873+{
4874+ ssize_t err = 0;
4875+ struct file *lower_file;
4876+ struct dentry *dentry = file->f_path.dentry;
4877+ struct dentry *parent;
4878+
4879+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4880+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4881+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4882+
4883+ err = unionfs_file_revalidate(file, parent, true);
4884+ if (unlikely(err))
4885+ goto out;
4886+
4887+ lower_file = unionfs_lower_file(file);
4888+ err = vfs_splice_from(pipe, lower_file, ppos, len, flags);
4889+ /* update our inode times+sizes upon a successful lower write */
4890+ if (err >= 0) {
4891+ fsstack_copy_inode_size(dentry->d_inode,
4892+ lower_file->f_path.dentry->d_inode);
4893+ fsstack_copy_attr_times(dentry->d_inode,
4894+ lower_file->f_path.dentry->d_inode);
4895+ unionfs_check_file(file);
4896+ }
4897+
4898+out:
4899+ unionfs_unlock_dentry(dentry);
4900+ unionfs_unlock_parent(dentry, parent);
4901+ unionfs_read_unlock(dentry->d_sb);
4902+ return err;
4903+}
4904+
4905+struct file_operations unionfs_main_fops = {
4906+ .llseek = generic_file_llseek,
4907+ .read = unionfs_read,
4908+ .write = unionfs_write,
4909+ .readdir = unionfs_file_readdir,
4910+ .unlocked_ioctl = unionfs_ioctl,
4911+#ifdef CONFIG_COMPAT
4912+ .compat_ioctl = unionfs_ioctl,
4913+#endif
4914+ .mmap = unionfs_mmap,
4915+ .open = unionfs_open,
4916+ .flush = unionfs_flush,
4917+ .release = unionfs_file_release,
4918+ .fsync = unionfs_fsync,
4919+ .fasync = unionfs_fasync,
4920+ .splice_read = unionfs_splice_read,
4921+ .splice_write = unionfs_splice_write,
4922+};
4923diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
4924new file mode 100644
4925index 0000000..b207c13
4926--- /dev/null
4927+++ b/fs/unionfs/inode.c
4928@@ -0,0 +1,1099 @@
4929+/*
4930+ * Copyright (c) 2003-2011 Erez Zadok
4931+ * Copyright (c) 2003-2006 Charles P. Wright
4932+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4933+ * Copyright (c) 2005-2006 Junjiro Okajima
4934+ * Copyright (c) 2005 Arun M. Krishnakumar
4935+ * Copyright (c) 2004-2006 David P. Quigley
4936+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4937+ * Copyright (c) 2003 Puja Gupta
4938+ * Copyright (c) 2003 Harikesavan Krishnan
4939+ * Copyright (c) 2003-2011 Stony Brook University
4940+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
4941+ *
4942+ * This program is free software; you can redistribute it and/or modify
4943+ * it under the terms of the GNU General Public License version 2 as
4944+ * published by the Free Software Foundation.
4945+ */
4946+
4947+#include "union.h"
4948+
4949+/*
4950+ * Find a writeable branch to create new object in. Checks all writeble
4951+ * branches of the parent inode, from istart to iend order; if none are
4952+ * suitable, also tries branch 0 (which may require a copyup).
4953+ *
4954+ * Return a lower_dentry we can use to create object in, or ERR_PTR.
4955+ */
4956+static struct dentry *find_writeable_branch(struct inode *parent,
4957+ struct dentry *dentry)
4958+{
4959+ int err = -EINVAL;
4960+ int bindex, istart, iend;
4961+ struct dentry *lower_dentry = NULL;
4962+
4963+ istart = ibstart(parent);
4964+ iend = ibend(parent);
4965+ if (istart < 0)
4966+ goto out;
4967+
4968+begin:
4969+ for (bindex = istart; bindex <= iend; bindex++) {
4970+ /* skip non-writeable branches */
4971+ err = is_robranch_super(dentry->d_sb, bindex);
4972+ if (err) {
4973+ err = -EROFS;
4974+ continue;
4975+ }
4976+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4977+ if (!lower_dentry)
4978+ continue;
4979+ /*
4980+ * check for whiteouts in writeable branch, and remove them
4981+ * if necessary.
4982+ */
4983+ err = check_unlink_whiteout(dentry, lower_dentry, bindex);
4984+ if (err > 0) /* ignore if whiteout found and removed */
4985+ err = 0;
4986+ if (err)
4987+ continue;
4988+ /* if get here, we can write to the branch */
4989+ break;
4990+ }
4991+ /*
4992+ * If istart wasn't already branch 0, and we got any error, then try
4993+ * branch 0 (which may require copyup)
4994+ */
4995+ if (err && istart > 0) {
4996+ istart = iend = 0;
4997+ goto begin;
4998+ }
4999+
5000+ /*
5001+ * If we tried even branch 0, and still got an error, abort. But if
5002+ * the error was an EROFS, then we should try to copyup.
5003+ */
5004+ if (err && err != -EROFS)
5005+ goto out;
5006+
5007+ /*
5008+ * If we get here, then check if copyup needed. If lower_dentry is
5009+ * NULL, create the entire dentry directory structure in branch 0.
5010+ */
5011+ if (!lower_dentry) {
5012+ bindex = 0;
5013+ lower_dentry = create_parents(parent, dentry,
5014+ dentry->d_name.name, bindex);
5015+ if (IS_ERR(lower_dentry)) {
5016+ err = PTR_ERR(lower_dentry);
5017+ goto out;
5018+ }
5019+ }
5020+ err = 0; /* all's well */
5021+out:
5022+ if (err)
5023+ return ERR_PTR(err);
5024+ return lower_dentry;
5025+}
5026+
5027+static int unionfs_create(struct inode *dir, struct dentry *dentry,
5028+ int mode, struct nameidata *nd_unused)
5029+{
5030+ int err = 0;
5031+ struct dentry *lower_dentry = NULL;
5032+ struct dentry *lower_parent_dentry = NULL;
5033+ struct dentry *parent;
5034+ int valid = 0;
5035+ struct nameidata lower_nd;
5036+
5037+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5038+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5039+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5040+
5041+ valid = __unionfs_d_revalidate(dentry, parent, false);
5042+ if (unlikely(!valid)) {
5043+ err = -ESTALE; /* same as what real_lookup does */
5044+ goto out;
5045+ }
5046+
5047+ lower_dentry = find_writeable_branch(dir, dentry);
5048+ if (IS_ERR(lower_dentry)) {
5049+ err = PTR_ERR(lower_dentry);
5050+ goto out;
5051+ }
5052+
5053+ lower_parent_dentry = lock_parent(lower_dentry);
5054+ if (IS_ERR(lower_parent_dentry)) {
5055+ err = PTR_ERR(lower_parent_dentry);
5056+ goto out_unlock;
5057+ }
5058+
5059+ err = init_lower_nd(&lower_nd, LOOKUP_CREATE);
5060+ if (unlikely(err < 0))
5061+ goto out_unlock;
5062+ err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode,
5063+ &lower_nd);
5064+ release_lower_nd(&lower_nd, err);
5065+
5066+ if (!err) {
5067+ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5068+ if (!err) {
5069+ unionfs_copy_attr_times(dir);
5070+ fsstack_copy_inode_size(dir,
5071+ lower_parent_dentry->d_inode);
5072+ /* update no. of links on parent directory */
5073+ dir->i_nlink = unionfs_get_nlinks(dir);
5074+ }
5075+ }
5076+
5077+out_unlock:
5078+ unlock_dir(lower_parent_dentry);
5079+out:
5080+ if (!err) {
5081+ unionfs_postcopyup_setmnt(dentry);
5082+ unionfs_check_inode(dir);
5083+ unionfs_check_dentry(dentry);
5084+ }
5085+ unionfs_unlock_dentry(dentry);
5086+ unionfs_unlock_parent(dentry, parent);
5087+ unionfs_read_unlock(dentry->d_sb);
5088+ return err;
5089+}
5090+
5091+/*
5092+ * unionfs_lookup is the only special function which takes a dentry, yet we
5093+ * do NOT want to call __unionfs_d_revalidate_chain because by definition,
5094+ * we don't have a valid dentry here yet.
5095+ */
5096+static struct dentry *unionfs_lookup(struct inode *dir,
5097+ struct dentry *dentry,
5098+ struct nameidata *nd_unused)
5099+{
5100+ struct dentry *ret, *parent;
5101+ int err = 0;
5102+
5103+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5104+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5105+
5106+ /*
5107+ * As long as we lock/dget the parent, then can skip validating the
5108+ * parent now; we may have to rebuild this dentry on the next
5109+ * ->d_revalidate, however.
5110+ */
5111+
5112+ /* allocate dentry private data. We free it in ->d_release */
5113+ err = new_dentry_private_data(dentry, UNIONFS_DMUTEX_CHILD);
5114+ if (unlikely(err)) {
5115+ ret = ERR_PTR(err);
5116+ goto out;
5117+ }
5118+
5119+ ret = unionfs_lookup_full(dentry, parent, INTERPOSE_LOOKUP);
5120+
5121+ if (!IS_ERR(ret)) {
5122+ if (ret)
5123+ dentry = ret;
5124+ /* lookup_full can return multiple positive dentries */
5125+ if (dentry->d_inode && !S_ISDIR(dentry->d_inode->i_mode)) {
5126+ BUG_ON(dbstart(dentry) < 0);
5127+ unionfs_postcopyup_release(dentry);
5128+ }
5129+ unionfs_copy_attr_times(dentry->d_inode);
5130+ }
5131+
5132+ unionfs_check_inode(dir);
5133+ if (!IS_ERR(ret))
5134+ unionfs_check_dentry(dentry);
5135+ unionfs_check_dentry(parent);
5136+ unionfs_unlock_dentry(dentry); /* locked in new_dentry_private data */
5137+
5138+out:
5139+ unionfs_unlock_parent(dentry, parent);
5140+ unionfs_read_unlock(dentry->d_sb);
5141+
5142+ return ret;
5143+}
5144+
5145+static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
5146+ struct dentry *new_dentry)
5147+{
5148+ int err = 0;
5149+ struct dentry *lower_old_dentry = NULL;
5150+ struct dentry *lower_new_dentry = NULL;
5151+ struct dentry *lower_dir_dentry = NULL;
5152+ struct dentry *old_parent, *new_parent;
5153+ char *name = NULL;
5154+ bool valid;
5155+
5156+ unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5157+ old_parent = dget_parent(old_dentry);
5158+ new_parent = dget_parent(new_dentry);
5159+ unionfs_double_lock_parents(old_parent, new_parent);
5160+ unionfs_double_lock_dentry(old_dentry, new_dentry);
5161+
5162+ valid = __unionfs_d_revalidate(old_dentry, old_parent, false);
5163+ if (unlikely(!valid)) {
5164+ err = -ESTALE;
5165+ goto out;
5166+ }
5167+ if (new_dentry->d_inode) {
5168+ valid = __unionfs_d_revalidate(new_dentry, new_parent, false);
5169+ if (unlikely(!valid)) {
5170+ err = -ESTALE;
5171+ goto out;
5172+ }
5173+ }
5174+
5175+ lower_new_dentry = unionfs_lower_dentry(new_dentry);
5176+
5177+ /* check for a whiteout in new dentry branch, and delete it */
5178+ err = check_unlink_whiteout(new_dentry, lower_new_dentry,
5179+ dbstart(new_dentry));
5180+ if (err > 0) { /* whiteout found and removed successfully */
5181+ lower_dir_dentry = dget_parent(lower_new_dentry);
5182+ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
5183+ dput(lower_dir_dentry);
5184+ dir->i_nlink = unionfs_get_nlinks(dir);
5185+ err = 0;
5186+ }
5187+ if (err)
5188+ goto out;
5189+
5190+ /* check if parent hierachy is needed, then link in same branch */
5191+ if (dbstart(old_dentry) != dbstart(new_dentry)) {
5192+ lower_new_dentry = create_parents(dir, new_dentry,
5193+ new_dentry->d_name.name,
5194+ dbstart(old_dentry));
5195+ err = PTR_ERR(lower_new_dentry);
5196+ if (IS_COPYUP_ERR(err))
5197+ goto docopyup;
5198+ if (!lower_new_dentry || IS_ERR(lower_new_dentry))
5199+ goto out;
5200+ }
5201+ lower_new_dentry = unionfs_lower_dentry(new_dentry);
5202+ lower_old_dentry = unionfs_lower_dentry(old_dentry);
5203+
5204+ BUG_ON(dbstart(old_dentry) != dbstart(new_dentry));
5205+ lower_dir_dentry = lock_parent(lower_new_dentry);
5206+ err = is_robranch(old_dentry);
5207+ if (!err) {
5208+ /* see Documentation/filesystems/unionfs/issues.txt */
5209+ lockdep_off();
5210+ err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
5211+ lower_new_dentry);
5212+ lockdep_on();
5213+ }
5214+ unlock_dir(lower_dir_dentry);
5215+
5216+docopyup:
5217+ if (IS_COPYUP_ERR(err)) {
5218+ int old_bstart = dbstart(old_dentry);
5219+ int bindex;
5220+
5221+ for (bindex = old_bstart - 1; bindex >= 0; bindex--) {
5222+ err = copyup_dentry(old_parent->d_inode,
5223+ old_dentry, old_bstart,
5224+ bindex, old_dentry->d_name.name,
5225+ old_dentry->d_name.len, NULL,
5226+ i_size_read(old_dentry->d_inode));
5227+ if (err)
5228+ continue;
5229+ lower_new_dentry =
5230+ create_parents(dir, new_dentry,
5231+ new_dentry->d_name.name,
5232+ bindex);
5233+ lower_old_dentry = unionfs_lower_dentry(old_dentry);
5234+ lower_dir_dentry = lock_parent(lower_new_dentry);
5235+ /* see Documentation/filesystems/unionfs/issues.txt */
5236+ lockdep_off();
5237+ /* do vfs_link */
5238+ err = vfs_link(lower_old_dentry,
5239+ lower_dir_dentry->d_inode,
5240+ lower_new_dentry);
5241+ lockdep_on();
5242+ unlock_dir(lower_dir_dentry);
5243+ goto check_link;
5244+ }
5245+ goto out;
5246+ }
5247+
5248+check_link:
5249+ if (err || !lower_new_dentry->d_inode)
5250+ goto out;
5251+
5252+ /* Its a hard link, so use the same inode */
5253+ new_dentry->d_inode = igrab(old_dentry->d_inode);
5254+ d_add(new_dentry, new_dentry->d_inode);
5255+ unionfs_copy_attr_all(dir, lower_new_dentry->d_parent->d_inode);
5256+ fsstack_copy_inode_size(dir, lower_new_dentry->d_parent->d_inode);
5257+
5258+ /* propagate number of hard-links */
5259+ old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode);
5260+ /* new dentry's ctime may have changed due to hard-link counts */
5261+ unionfs_copy_attr_times(new_dentry->d_inode);
5262+
5263+out:
5264+ if (!new_dentry->d_inode)
5265+ d_drop(new_dentry);
5266+
5267+ kfree(name);
5268+ if (!err)
5269+ unionfs_postcopyup_setmnt(new_dentry);
5270+
5271+ unionfs_check_inode(dir);
5272+ unionfs_check_dentry(new_dentry);
5273+ unionfs_check_dentry(old_dentry);
5274+
5275+ unionfs_double_unlock_dentry(old_dentry, new_dentry);
5276+ unionfs_double_unlock_parents(old_parent, new_parent);
5277+ dput(new_parent);
5278+ dput(old_parent);
5279+ unionfs_read_unlock(old_dentry->d_sb);
5280+
5281+ return err;
5282+}
5283+
5284+static int unionfs_symlink(struct inode *dir, struct dentry *dentry,
5285+ const char *symname)
5286+{
5287+ int err = 0;
5288+ struct dentry *lower_dentry = NULL;
5289+ struct dentry *wh_dentry = NULL;
5290+ struct dentry *lower_parent_dentry = NULL;
5291+ struct dentry *parent;
5292+ char *name = NULL;
5293+ int valid = 0;
5294+ umode_t mode;
5295+
5296+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5297+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5298+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5299+
5300+ valid = __unionfs_d_revalidate(dentry, parent, false);
5301+ if (unlikely(!valid)) {
5302+ err = -ESTALE;
5303+ goto out;
5304+ }
5305+
5306+ /*
5307+ * It's only a bug if this dentry was not negative and couldn't be
5308+ * revalidated (shouldn't happen).
5309+ */
5310+ BUG_ON(!valid && dentry->d_inode);
5311+
5312+ lower_dentry = find_writeable_branch(dir, dentry);
5313+ if (IS_ERR(lower_dentry)) {
5314+ err = PTR_ERR(lower_dentry);
5315+ goto out;
5316+ }
5317+
5318+ lower_parent_dentry = lock_parent(lower_dentry);
5319+ if (IS_ERR(lower_parent_dentry)) {
5320+ err = PTR_ERR(lower_parent_dentry);
5321+ goto out_unlock;
5322+ }
5323+
5324+ mode = S_IALLUGO;
5325+ err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname);
5326+ if (!err) {
5327+ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5328+ if (!err) {
5329+ unionfs_copy_attr_times(dir);
5330+ fsstack_copy_inode_size(dir,
5331+ lower_parent_dentry->d_inode);
5332+ /* update no. of links on parent directory */
5333+ dir->i_nlink = unionfs_get_nlinks(dir);
5334+ }
5335+ }
5336+
5337+out_unlock:
5338+ unlock_dir(lower_parent_dentry);
5339+out:
5340+ dput(wh_dentry);
5341+ kfree(name);
5342+
5343+ if (!err) {
5344+ unionfs_postcopyup_setmnt(dentry);
5345+ unionfs_check_inode(dir);
5346+ unionfs_check_dentry(dentry);
5347+ }
5348+ unionfs_unlock_dentry(dentry);
5349+ unionfs_unlock_parent(dentry, parent);
5350+ unionfs_read_unlock(dentry->d_sb);
5351+ return err;
5352+}
5353+
5354+static int unionfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
5355+{
5356+ int err = 0;
5357+ struct dentry *lower_dentry = NULL;
5358+ struct dentry *lower_parent_dentry = NULL;
5359+ struct dentry *parent;
5360+ int bindex = 0, bstart;
5361+ char *name = NULL;
5362+ int valid;
5363+
5364+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5365+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5366+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5367+
5368+ valid = __unionfs_d_revalidate(dentry, parent, false);
5369+ if (unlikely(!valid)) {
5370+ err = -ESTALE; /* same as what real_lookup does */
5371+ goto out;
5372+ }
5373+
5374+ bstart = dbstart(dentry);
5375+
5376+ lower_dentry = unionfs_lower_dentry(dentry);
5377+
5378+ /* check for a whiteout in new dentry branch, and delete it */
5379+ err = check_unlink_whiteout(dentry, lower_dentry, bstart);
5380+ if (err > 0) /* whiteout found and removed successfully */
5381+ err = 0;
5382+ if (err) {
5383+ /* exit if the error returned was NOT -EROFS */
5384+ if (!IS_COPYUP_ERR(err))
5385+ goto out;
5386+ bstart--;
5387+ }
5388+
5389+ /* check if copyup's needed, and mkdir */
5390+ for (bindex = bstart; bindex >= 0; bindex--) {
5391+ int i;
5392+ int bend = dbend(dentry);
5393+
5394+ if (is_robranch_super(dentry->d_sb, bindex))
5395+ continue;
5396+
5397+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5398+ if (!lower_dentry) {
5399+ lower_dentry = create_parents(dir, dentry,
5400+ dentry->d_name.name,
5401+ bindex);
5402+ if (!lower_dentry || IS_ERR(lower_dentry)) {
5403+ printk(KERN_ERR "unionfs: lower dentry "
5404+ " NULL for bindex = %d\n", bindex);
5405+ continue;
5406+ }
5407+ }
5408+
5409+ lower_parent_dentry = lock_parent(lower_dentry);
5410+
5411+ if (IS_ERR(lower_parent_dentry)) {
5412+ err = PTR_ERR(lower_parent_dentry);
5413+ goto out;
5414+ }
5415+
5416+ err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry,
5417+ mode);
5418+
5419+ unlock_dir(lower_parent_dentry);
5420+
5421+ /* did the mkdir succeed? */
5422+ if (err)
5423+ break;
5424+
5425+ for (i = bindex + 1; i <= bend; i++) {
5426+ /* XXX: use path_put_lowers? */
5427+ if (unionfs_lower_dentry_idx(dentry, i)) {
5428+ dput(unionfs_lower_dentry_idx(dentry, i));
5429+ unionfs_set_lower_dentry_idx(dentry, i, NULL);
5430+ }
5431+ }
5432+ dbend(dentry) = bindex;
5433+
5434+ /*
5435+ * Only INTERPOSE_LOOKUP can return a value other than 0 on
5436+ * err.
5437+ */
5438+ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5439+ if (!err) {
5440+ unionfs_copy_attr_times(dir);
5441+ fsstack_copy_inode_size(dir,
5442+ lower_parent_dentry->d_inode);
5443+
5444+ /* update number of links on parent directory */
5445+ dir->i_nlink = unionfs_get_nlinks(dir);
5446+ }
5447+
5448+ err = make_dir_opaque(dentry, dbstart(dentry));
5449+ if (err) {
5450+ printk(KERN_ERR "unionfs: mkdir: error creating "
5451+ ".wh.__dir_opaque: %d\n", err);
5452+ goto out;
5453+ }
5454+
5455+ /* we are done! */
5456+ break;
5457+ }
5458+
5459+out:
5460+ if (!dentry->d_inode)
5461+ d_drop(dentry);
5462+
5463+ kfree(name);
5464+
5465+ if (!err) {
5466+ unionfs_copy_attr_times(dentry->d_inode);
5467+ unionfs_postcopyup_setmnt(dentry);
5468+ }
5469+ unionfs_check_inode(dir);
5470+ unionfs_check_dentry(dentry);
5471+ unionfs_unlock_dentry(dentry);
5472+ unionfs_unlock_parent(dentry, parent);
5473+ unionfs_read_unlock(dentry->d_sb);
5474+
5475+ return err;
5476+}
5477+
5478+static int unionfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
5479+ dev_t dev)
5480+{
5481+ int err = 0;
5482+ struct dentry *lower_dentry = NULL;
5483+ struct dentry *wh_dentry = NULL;
5484+ struct dentry *lower_parent_dentry = NULL;
5485+ struct dentry *parent;
5486+ char *name = NULL;
5487+ int valid = 0;
5488+
5489+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5490+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5491+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5492+
5493+ valid = __unionfs_d_revalidate(dentry, parent, false);
5494+ if (unlikely(!valid)) {
5495+ err = -ESTALE;
5496+ goto out;
5497+ }
5498+
5499+ /*
5500+ * It's only a bug if this dentry was not negative and couldn't be
5501+ * revalidated (shouldn't happen).
5502+ */
5503+ BUG_ON(!valid && dentry->d_inode);
5504+
5505+ lower_dentry = find_writeable_branch(dir, dentry);
5506+ if (IS_ERR(lower_dentry)) {
5507+ err = PTR_ERR(lower_dentry);
5508+ goto out;
5509+ }
5510+
5511+ lower_parent_dentry = lock_parent(lower_dentry);
5512+ if (IS_ERR(lower_parent_dentry)) {
5513+ err = PTR_ERR(lower_parent_dentry);
5514+ goto out_unlock;
5515+ }
5516+
5517+ err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev);
5518+ if (!err) {
5519+ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5520+ if (!err) {
5521+ unionfs_copy_attr_times(dir);
5522+ fsstack_copy_inode_size(dir,
5523+ lower_parent_dentry->d_inode);
5524+ /* update no. of links on parent directory */
5525+ dir->i_nlink = unionfs_get_nlinks(dir);
5526+ }
5527+ }
5528+
5529+out_unlock:
5530+ unlock_dir(lower_parent_dentry);
5531+out:
5532+ dput(wh_dentry);
5533+ kfree(name);
5534+
5535+ if (!err) {
5536+ unionfs_postcopyup_setmnt(dentry);
5537+ unionfs_check_inode(dir);
5538+ unionfs_check_dentry(dentry);
5539+ }
5540+ unionfs_unlock_dentry(dentry);
5541+ unionfs_unlock_parent(dentry, parent);
5542+ unionfs_read_unlock(dentry->d_sb);
5543+ return err;
5544+}
5545+
5546+/* requires sb, dentry, and parent to already be locked */
5547+static int __unionfs_readlink(struct dentry *dentry, char __user *buf,
5548+ int bufsiz)
5549+{
5550+ int err;
5551+ struct dentry *lower_dentry;
5552+
5553+ lower_dentry = unionfs_lower_dentry(dentry);
5554+
5555+ if (!lower_dentry->d_inode->i_op ||
5556+ !lower_dentry->d_inode->i_op->readlink) {
5557+ err = -EINVAL;
5558+ goto out;
5559+ }
5560+
5561+ err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
5562+ buf, bufsiz);
5563+ if (err >= 0)
5564+ fsstack_copy_attr_atime(dentry->d_inode,
5565+ lower_dentry->d_inode);
5566+
5567+out:
5568+ return err;
5569+}
5570+
5571+static int unionfs_readlink(struct dentry *dentry, char __user *buf,
5572+ int bufsiz)
5573+{
5574+ int err;
5575+ struct dentry *parent;
5576+
5577+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5578+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5579+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5580+
5581+ if (unlikely(!__unionfs_d_revalidate(dentry, parent, false))) {
5582+ err = -ESTALE;
5583+ goto out;
5584+ }
5585+
5586+ err = __unionfs_readlink(dentry, buf, bufsiz);
5587+
5588+out:
5589+ unionfs_check_dentry(dentry);
5590+ unionfs_unlock_dentry(dentry);
5591+ unionfs_unlock_parent(dentry, parent);
5592+ unionfs_read_unlock(dentry->d_sb);
5593+
5594+ return err;
5595+}
5596+
5597+static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd)
5598+{
5599+ char *buf;
5600+ int len = PAGE_SIZE, err;
5601+ mm_segment_t old_fs;
5602+ struct dentry *parent;
5603+
5604+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5605+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5606+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5607+
5608+ /* This is freed by the put_link method assuming a successful call. */
5609+ buf = kmalloc(len, GFP_KERNEL);
5610+ if (unlikely(!buf)) {
5611+ err = -ENOMEM;
5612+ goto out;
5613+ }
5614+
5615+ /* read the symlink, and then we will follow it */
5616+ old_fs = get_fs();
5617+ set_fs(KERNEL_DS);
5618+ err = __unionfs_readlink(dentry, buf, len);
5619+ set_fs(old_fs);
5620+ if (err < 0) {
5621+ kfree(buf);
5622+ buf = NULL;
5623+ goto out;
5624+ }
5625+ buf[err] = 0;
5626+ nd_set_link(nd, buf);
5627+ err = 0;
5628+
5629+out:
5630+ if (err >= 0) {
5631+ unionfs_check_nd(nd);
5632+ unionfs_check_dentry(dentry);
5633+ }
5634+
5635+ unionfs_unlock_dentry(dentry);
5636+ unionfs_unlock_parent(dentry, parent);
5637+ unionfs_read_unlock(dentry->d_sb);
5638+
5639+ return ERR_PTR(err);
5640+}
5641+
5642+/* this @nd *IS* still used */
5643+static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd,
5644+ void *cookie)
5645+{
5646+ struct dentry *parent;
5647+ char *buf;
5648+
5649+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5650+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5651+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5652+
5653+ if (unlikely(!__unionfs_d_revalidate(dentry, parent, false)))
5654+ printk(KERN_ERR
5655+ "unionfs: put_link failed to revalidate dentry\n");
5656+
5657+ unionfs_check_dentry(dentry);
5658+#if 0
5659+ /* XXX: can't run this check b/c this fxn can receive a poisoned 'nd' PTR */
5660+ unionfs_check_nd(nd);
5661+#endif
5662+ buf = nd_get_link(nd);
5663+ if (!IS_ERR(buf))
5664+ kfree(buf);
5665+ unionfs_unlock_dentry(dentry);
5666+ unionfs_unlock_parent(dentry, parent);
5667+ unionfs_read_unlock(dentry->d_sb);
5668+}
5669+
5670+/*
5671+ * This is a variant of fs/namei.c:permission() or inode_permission() which
5672+ * skips over EROFS tests (because we perform copyup on EROFS).
5673+ */
5674+static int __inode_permission(struct inode *inode, int mask, unsigned int flags)
5675+{
5676+ int retval;
5677+
5678+ /* nobody gets write access to an immutable file */
5679+ if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
5680+ return -EACCES;
5681+
5682+ /* Ordinary permission routines do not understand MAY_APPEND. */
5683+ if (inode->i_op && inode->i_op->permission) {
5684+ retval = inode->i_op->permission(inode, mask, flags);
5685+ if (!retval) {
5686+ /*
5687+ * Exec permission on a regular file is denied if none
5688+ * of the execute bits are set.
5689+ *
5690+ * This check should be done by the ->permission()
5691+ * method.
5692+ */
5693+ if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) &&
5694+ !(inode->i_mode & S_IXUGO))
5695+ return -EACCES;
5696+ }
5697+ } else {
5698+ retval = generic_permission(inode, mask, flags, NULL);
5699+ }
5700+ if (retval)
5701+ return retval;
5702+
5703+ return security_inode_permission(inode,
5704+ mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
5705+}
5706+
5707+/*
5708+ * Don't grab the superblock read-lock in unionfs_permission, which prevents
5709+ * a deadlock with the branch-management "add branch" code (which grabbed
5710+ * the write lock). It is safe to not grab the read lock here, because even
5711+ * with branch management taking place, there is no chance that
5712+ * unionfs_permission, or anything it calls, will use stale branch
5713+ * information.
5714+ */
5715+static int unionfs_permission(struct inode *inode, int mask, unsigned int flags)
5716+{
5717+ struct inode *lower_inode = NULL;
5718+ int err = 0;
5719+ int bindex, bstart, bend;
5720+ int is_file;
5721+ const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ);
5722+ struct inode *inode_grabbed;
5723+ struct dentry *dentry;
5724+
5725+ if (flags & IPERM_FLAG_RCU) {
5726+ err = -ECHILD;
5727+ goto out_nograb;
5728+ }
5729+
5730+ dentry = d_find_alias(inode);
5731+ if (dentry)
5732+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5733+
5734+ inode_grabbed = igrab(inode);
5735+ is_file = !S_ISDIR(inode->i_mode);
5736+
5737+ if (!UNIONFS_I(inode)->lower_inodes) {
5738+ if (is_file) /* dirs can be unlinked but chdir'ed to */
5739+ err = -ESTALE; /* force revalidate */
5740+ goto out;
5741+ }
5742+ bstart = ibstart(inode);
5743+ bend = ibend(inode);
5744+ if (unlikely(bstart < 0 || bend < 0)) {
5745+ /*
5746+ * With branch-management, we can get a stale inode here.
5747+ * If so, we return ESTALE back to link_path_walk, which
5748+ * would discard the dcache entry and re-lookup the
5749+ * dentry+inode. This should be equivalent to issuing
5750+ * __unionfs_d_revalidate_chain on nd.dentry here.
5751+ */
5752+ if (is_file) /* dirs can be unlinked but chdir'ed to */
5753+ err = -ESTALE; /* force revalidate */
5754+ goto out;
5755+ }
5756+
5757+ for (bindex = bstart; bindex <= bend; bindex++) {
5758+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
5759+ if (!lower_inode)
5760+ continue;
5761+
5762+ /*
5763+ * check the condition for D-F-D underlying files/directories,
5764+ * we don't have to check for files, if we are checking for
5765+ * directories.
5766+ */
5767+ if (!is_file && !S_ISDIR(lower_inode->i_mode))
5768+ continue;
5769+
5770+ /*
5771+ * We check basic permissions, but we ignore any conditions
5772+ * such as readonly file systems or branches marked as
5773+ * readonly, because those conditions should lead to a
5774+ * copyup taking place later on. However, if user never had
5775+ * access to the file, then no copyup could ever take place.
5776+ */
5777+ err = __inode_permission(lower_inode, mask, flags);
5778+ if (err && err != -EACCES && err != EPERM && bindex > 0) {
5779+ umode_t mode = lower_inode->i_mode;
5780+ if ((is_robranch_super(inode->i_sb, bindex) ||
5781+ __is_rdonly(lower_inode)) &&
5782+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
5783+ err = 0;
5784+ if (IS_COPYUP_ERR(err))
5785+ err = 0;
5786+ }
5787+
5788+ /*
5789+ * NFS HACK: NFSv2/3 return EACCES on readonly-exported,
5790+ * locally readonly-mounted file systems, instead of EROFS
5791+ * like other file systems do. So we have no choice here
5792+ * but to intercept this and ignore it for NFS branches
5793+ * marked readonly. Specifically, we avoid using NFS's own
5794+ * "broken" ->permission method, and rely on
5795+ * generic_permission() to do basic checking for us.
5796+ */
5797+ if (err && err == -EACCES &&
5798+ is_robranch_super(inode->i_sb, bindex) &&
5799+ lower_inode->i_sb->s_magic == NFS_SUPER_MAGIC)
5800+ err = generic_permission(lower_inode, mask, flags, NULL);
5801+
5802+ /*
5803+ * The permissions are an intersection of the overall directory
5804+ * permissions, so we fail if one fails.
5805+ */
5806+ if (err)
5807+ goto out;
5808+
5809+ /* only the leftmost file matters. */
5810+ if (is_file || write_mask) {
5811+ if (is_file && write_mask) {
5812+ err = get_write_access(lower_inode);
5813+ if (!err)
5814+ put_write_access(lower_inode);
5815+ }
5816+ break;
5817+ }
5818+ }
5819+ /* sync times which may have changed (asynchronously) below */
5820+ unionfs_copy_attr_times(inode);
5821+
5822+out:
5823+ unionfs_check_inode(inode);
5824+ if (dentry) {
5825+ unionfs_unlock_dentry(dentry);
5826+ dput(dentry);
5827+ }
5828+ iput(inode_grabbed);
5829+out_nograb:
5830+ return err;
5831+}
5832+
5833+static int unionfs_setattr(struct dentry *dentry, struct iattr *ia)
5834+{
5835+ int err = 0;
5836+ struct dentry *lower_dentry;
5837+ struct dentry *parent;
5838+ struct inode *inode;
5839+ struct inode *lower_inode;
5840+ int bstart, bend, bindex;
5841+ loff_t size;
5842+ struct iattr lower_ia;
5843+
5844+ /* check if user has permission to change inode */
5845+ err = inode_change_ok(dentry->d_inode, ia);
5846+ if (err)
5847+ goto out_err;
5848+
5849+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5850+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5851+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5852+
5853+ if (unlikely(!__unionfs_d_revalidate(dentry, parent, false))) {
5854+ err = -ESTALE;
5855+ goto out;
5856+ }
5857+
5858+ bstart = dbstart(dentry);
5859+ bend = dbend(dentry);
5860+ inode = dentry->d_inode;
5861+
5862+ /*
5863+ * mode change is for clearing setuid/setgid. Allow lower filesystem
5864+ * to reinterpret it in its own way.
5865+ */
5866+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
5867+ ia->ia_valid &= ~ATTR_MODE;
5868+
5869+ lower_dentry = unionfs_lower_dentry(dentry);
5870+ if (!lower_dentry) { /* should never happen after above revalidate */
5871+ err = -EINVAL;
5872+ goto out;
5873+ }
5874+
5875+ /*
5876+ * Get the lower inode directly from lower dentry, in case ibstart
5877+ * is -1 (which happens when the file is open but unlinked.
5878+ */
5879+ lower_inode = lower_dentry->d_inode;
5880+
5881+ /* check if user has permission to change lower inode */
5882+ err = inode_change_ok(lower_inode, ia);
5883+ if (err)
5884+ goto out;
5885+
5886+ /* copyup if the file is on a read only branch */
5887+ if (is_robranch_super(dentry->d_sb, bstart)
5888+ || __is_rdonly(lower_inode)) {
5889+ /* check if we have a branch to copy up to */
5890+ if (bstart <= 0) {
5891+ err = -EACCES;
5892+ goto out;
5893+ }
5894+
5895+ if (ia->ia_valid & ATTR_SIZE)
5896+ size = ia->ia_size;
5897+ else
5898+ size = i_size_read(inode);
5899+ /* copyup to next available branch */
5900+ for (bindex = bstart - 1; bindex >= 0; bindex--) {
5901+ err = copyup_dentry(parent->d_inode,
5902+ dentry, bstart, bindex,
5903+ dentry->d_name.name,
5904+ dentry->d_name.len,
5905+ NULL, size);
5906+ if (!err)
5907+ break;
5908+ }
5909+ if (err)
5910+ goto out;
5911+ /* get updated lower_dentry/inode after copyup */
5912+ lower_dentry = unionfs_lower_dentry(dentry);
5913+ lower_inode = unionfs_lower_inode(inode);
5914+ /*
5915+ * check for whiteouts in writeable branch, and remove them
5916+ * if necessary.
5917+ */
5918+ if (lower_dentry) {
5919+ err = check_unlink_whiteout(dentry, lower_dentry,
5920+ bindex);
5921+ if (err > 0) /* ignore if whiteout found and removed */
5922+ err = 0;
5923+ }
5924+ }
5925+
5926+ /*
5927+ * If shrinking, first truncate upper level to cancel writing dirty
5928+ * pages beyond the new eof; and also if its' maxbytes is more
5929+ * limiting (fail with -EFBIG before making any change to the lower
5930+ * level). There is no need to vmtruncate the upper level
5931+ * afterwards in the other cases: we fsstack_copy_inode_size from
5932+ * the lower level.
5933+ */
5934+ if (ia->ia_valid & ATTR_SIZE) {
5935+ size = i_size_read(inode);
5936+ if (ia->ia_size < size || (ia->ia_size > size &&
5937+ inode->i_sb->s_maxbytes < lower_inode->i_sb->s_maxbytes)) {
5938+ err = vmtruncate(inode, ia->ia_size);
5939+ if (err)
5940+ goto out;
5941+ }
5942+ }
5943+
5944+ /* notify the (possibly copied-up) lower inode */
5945+ /*
5946+ * Note: we use lower_dentry->d_inode, because lower_inode may be
5947+ * unlinked (no inode->i_sb and i_ino==0. This happens if someone
5948+ * tries to open(), unlink(), then ftruncate() a file.
5949+ */
5950+ /* prepare our own lower struct iattr (with our own lower file) */
5951+ memcpy(&lower_ia, ia, sizeof(lower_ia));
5952+ if (ia->ia_valid & ATTR_FILE) {
5953+ lower_ia.ia_file = unionfs_lower_file(ia->ia_file);
5954+ BUG_ON(!lower_ia.ia_file); // XXX?
5955+ }
5956+
5957+ mutex_lock(&lower_dentry->d_inode->i_mutex);
5958+ err = notify_change(lower_dentry, &lower_ia);
5959+ mutex_unlock(&lower_dentry->d_inode->i_mutex);
5960+ if (err)
5961+ goto out;
5962+
5963+ /* get attributes from the first lower inode */
5964+ if (ibstart(inode) >= 0)
5965+ unionfs_copy_attr_all(inode, lower_inode);
5966+ /*
5967+ * unionfs_copy_attr_all will copy the lower times to our inode if
5968+ * the lower ones are newer (useful for cache coherency). However,
5969+ * ->setattr is the only place in which we may have to copy the
5970+ * lower inode times absolutely, to support utimes(2).
5971+ */
5972+ if (ia->ia_valid & ATTR_MTIME_SET)
5973+ inode->i_mtime = lower_inode->i_mtime;
5974+ if (ia->ia_valid & ATTR_CTIME)
5975+ inode->i_ctime = lower_inode->i_ctime;
5976+ if (ia->ia_valid & ATTR_ATIME_SET)
5977+ inode->i_atime = lower_inode->i_atime;
5978+ fsstack_copy_inode_size(inode, lower_inode);
5979+
5980+out:
5981+ if (!err)
5982+ unionfs_check_dentry(dentry);
5983+ unionfs_unlock_dentry(dentry);
5984+ unionfs_unlock_parent(dentry, parent);
5985+ unionfs_read_unlock(dentry->d_sb);
5986+out_err:
5987+ return err;
5988+}
5989+
5990+struct inode_operations unionfs_symlink_iops = {
5991+ .readlink = unionfs_readlink,
5992+ .permission = unionfs_permission,
5993+ .follow_link = unionfs_follow_link,
5994+ .setattr = unionfs_setattr,
5995+ .put_link = unionfs_put_link,
5996+};
5997+
5998+struct inode_operations unionfs_dir_iops = {
5999+ .create = unionfs_create,
6000+ .lookup = unionfs_lookup,
6001+ .link = unionfs_link,
6002+ .unlink = unionfs_unlink,
6003+ .symlink = unionfs_symlink,
6004+ .mkdir = unionfs_mkdir,
6005+ .rmdir = unionfs_rmdir,
6006+ .mknod = unionfs_mknod,
6007+ .rename = unionfs_rename,
6008+ .permission = unionfs_permission,
6009+ .setattr = unionfs_setattr,
6010+#ifdef CONFIG_UNION_FS_XATTR
6011+ .setxattr = unionfs_setxattr,
6012+ .getxattr = unionfs_getxattr,
6013+ .removexattr = unionfs_removexattr,
6014+ .listxattr = unionfs_listxattr,
6015+#endif /* CONFIG_UNION_FS_XATTR */
6016+};
6017+
6018+struct inode_operations unionfs_main_iops = {
6019+ .permission = unionfs_permission,
6020+ .setattr = unionfs_setattr,
6021+#ifdef CONFIG_UNION_FS_XATTR
6022+ .setxattr = unionfs_setxattr,
6023+ .getxattr = unionfs_getxattr,
6024+ .removexattr = unionfs_removexattr,
6025+ .listxattr = unionfs_listxattr,
6026+#endif /* CONFIG_UNION_FS_XATTR */
6027+};
6028diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c
6029new file mode 100644
6030index 0000000..3cbde56
6031--- /dev/null
6032+++ b/fs/unionfs/lookup.c
6033@@ -0,0 +1,569 @@
6034+/*
6035+ * Copyright (c) 2003-2011 Erez Zadok
6036+ * Copyright (c) 2003-2006 Charles P. Wright
6037+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
6038+ * Copyright (c) 2005-2006 Junjiro Okajima
6039+ * Copyright (c) 2005 Arun M. Krishnakumar
6040+ * Copyright (c) 2004-2006 David P. Quigley
6041+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
6042+ * Copyright (c) 2003 Puja Gupta
6043+ * Copyright (c) 2003 Harikesavan Krishnan
6044+ * Copyright (c) 2003-2011 Stony Brook University
6045+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
6046+ *
6047+ * This program is free software; you can redistribute it and/or modify
6048+ * it under the terms of the GNU General Public License version 2 as
6049+ * published by the Free Software Foundation.
6050+ */
6051+
6052+#include "union.h"
6053+
6054+/*
6055+ * Lookup one path component @name relative to a <base,mnt> path pair.
6056+ * Behaves nearly the same as lookup_one_len (i.e., return negative dentry
6057+ * on ENOENT), but uses the @mnt passed, so it can cross bind mounts and
6058+ * other lower mounts properly. If @new_mnt is non-null, will fill in the
6059+ * new mnt there. Caller is responsible to dput/mntput/path_put returned
6060+ * @dentry and @new_mnt.
6061+ */
6062+struct dentry *__lookup_one(struct dentry *base, struct vfsmount *mnt,
6063+ const char *name, struct vfsmount **new_mnt)
6064+{
6065+ struct dentry *dentry = NULL;
6066+ struct nameidata lower_nd;
6067+ int err;
6068+
6069+ /* we use flags=0 to get basic lookup */
6070+ err = vfs_path_lookup(base, mnt, name, 0, &lower_nd);
6071+
6072+ switch (err) {
6073+ case 0: /* no error */
6074+ dentry = lower_nd.path.dentry;
6075+ if (new_mnt)
6076+ *new_mnt = lower_nd.path.mnt; /* rc already inc'ed */
6077+ break;
6078+ case -ENOENT:
6079+ /*
6080+ * We don't consider ENOENT an error, and we want to return
6081+ * a negative dentry (ala lookup_one_len). As we know
6082+ * there was no inode for this name before (-ENOENT), then
6083+ * it's safe to call lookup_one_len (which doesn't take a
6084+ * vfsmount).
6085+ */
6086+ dentry = lookup_lck_len(name, base, strlen(name));
6087+ if (new_mnt)
6088+ *new_mnt = mntget(lower_nd.path.mnt);
6089+ break;
6090+ default: /* all other real errors */
6091+ dentry = ERR_PTR(err);
6092+ break;
6093+ }
6094+
6095+ return dentry;
6096+}
6097+
6098+/*
6099+ * This is a utility function that fills in a unionfs dentry.
6100+ * Caller must lock this dentry with unionfs_lock_dentry.
6101+ *
6102+ * Returns: 0 (ok), or -ERRNO if an error occurred.
6103+ * XXX: get rid of _partial_lookup and make callers call _lookup_full directly
6104+ */
6105+int unionfs_partial_lookup(struct dentry *dentry, struct dentry *parent)
6106+{
6107+ struct dentry *tmp;
6108+ int err = -ENOSYS;
6109+
6110+ tmp = unionfs_lookup_full(dentry, parent, INTERPOSE_PARTIAL);
6111+
6112+ if (!tmp) {
6113+ err = 0;
6114+ goto out;
6115+ }
6116+ if (IS_ERR(tmp)) {
6117+ err = PTR_ERR(tmp);
6118+ goto out;
6119+ }
6120+ /* XXX: need to change the interface */
6121+ BUG_ON(tmp != dentry);
6122+out:
6123+ return err;
6124+}
6125+
6126+/* The dentry cache is just so we have properly sized dentries. */
6127+static struct kmem_cache *unionfs_dentry_cachep;
6128+int unionfs_init_dentry_cache(void)
6129+{
6130+ unionfs_dentry_cachep =
6131+ kmem_cache_create("unionfs_dentry",
6132+ sizeof(struct unionfs_dentry_info),
6133+ 0, SLAB_RECLAIM_ACCOUNT, NULL);
6134+
6135+ return (unionfs_dentry_cachep ? 0 : -ENOMEM);
6136+}
6137+
6138+void unionfs_destroy_dentry_cache(void)
6139+{
6140+ if (unionfs_dentry_cachep)
6141+ kmem_cache_destroy(unionfs_dentry_cachep);
6142+}
6143+
6144+void free_dentry_private_data(struct dentry *dentry)
6145+{
6146+ if (!dentry || !dentry->d_fsdata)
6147+ return;
6148+ kfree(UNIONFS_D(dentry)->lower_paths);
6149+ UNIONFS_D(dentry)->lower_paths = NULL;
6150+ kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata);
6151+ dentry->d_fsdata = NULL;
6152+}
6153+
6154+static inline int __realloc_dentry_private_data(struct dentry *dentry)
6155+{
6156+ struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6157+ void *p;
6158+ int size;
6159+
6160+ BUG_ON(!info);
6161+
6162+ size = sizeof(struct path) * sbmax(dentry->d_sb);
6163+ p = krealloc(info->lower_paths, size, GFP_ATOMIC);
6164+ if (unlikely(!p))
6165+ return -ENOMEM;
6166+
6167+ info->lower_paths = p;
6168+
6169+ info->bstart = -1;
6170+ info->bend = -1;
6171+ info->bopaque = -1;
6172+ info->bcount = sbmax(dentry->d_sb);
6173+ atomic_set(&info->generation,
6174+ atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
6175+
6176+ memset(info->lower_paths, 0, size);
6177+
6178+ return 0;
6179+}
6180+
6181+/* UNIONFS_D(dentry)->lock must be locked */
6182+int realloc_dentry_private_data(struct dentry *dentry)
6183+{
6184+ if (!__realloc_dentry_private_data(dentry))
6185+ return 0;
6186+
6187+ kfree(UNIONFS_D(dentry)->lower_paths);
6188+ free_dentry_private_data(dentry);
6189+ return -ENOMEM;
6190+}
6191+
6192+/* allocate new dentry private data */
6193+int new_dentry_private_data(struct dentry *dentry, int subclass)
6194+{
6195+ struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6196+
6197+ BUG_ON(info);
6198+
6199+ info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC);
6200+ if (unlikely(!info))
6201+ return -ENOMEM;
6202+
6203+ mutex_init(&info->lock);
6204+ mutex_lock_nested(&info->lock, subclass);
6205+
6206+ info->lower_paths = NULL;
6207+
6208+ dentry->d_fsdata = info;
6209+
6210+ if (!__realloc_dentry_private_data(dentry))
6211+ return 0;
6212+
6213+ mutex_unlock(&info->lock);
6214+ free_dentry_private_data(dentry);
6215+ return -ENOMEM;
6216+}
6217+
6218+/*
6219+ * scan through the lower dentry objects, and set bstart to reflect the
6220+ * starting branch
6221+ */
6222+void update_bstart(struct dentry *dentry)
6223+{
6224+ int bindex;
6225+ int bstart = dbstart(dentry);
6226+ int bend = dbend(dentry);
6227+ struct dentry *lower_dentry;
6228+
6229+ for (bindex = bstart; bindex <= bend; bindex++) {
6230+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6231+ if (!lower_dentry)
6232+ continue;
6233+ if (lower_dentry->d_inode) {
6234+ dbstart(dentry) = bindex;
6235+ break;
6236+ }
6237+ dput(lower_dentry);
6238+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
6239+ }
6240+}
6241+
6242+
6243+/*
6244+ * Initialize a nameidata structure (the intent part) we can pass to a lower
6245+ * file system. Returns 0 on success or -error (only -ENOMEM possible).
6246+ * Inside that nd structure, this function may also return an allocated
6247+ * struct file (for open intents). The caller, when done with this nd, must
6248+ * kfree the intent file (using release_lower_nd).
6249+ *
6250+ * XXX: this code, and the callers of this code, should be redone using
6251+ * vfs_path_lookup() when (1) the nameidata structure is refactored into a
6252+ * separate intent-structure, and (2) open_namei() is broken into a VFS-only
6253+ * function and a method that other file systems can call.
6254+ */
6255+int init_lower_nd(struct nameidata *nd, unsigned int flags)
6256+{
6257+ int err = 0;
6258+#ifdef ALLOC_LOWER_ND_FILE
6259+ /*
6260+ * XXX: one day we may need to have the lower return an open file
6261+ * for us. It is not needed in 2.6.23-rc1 for nfs2/nfs3, but may
6262+ * very well be needed for nfs4.
6263+ */
6264+ struct file *file;
6265+#endif /* ALLOC_LOWER_ND_FILE */
6266+
6267+ memset(nd, 0, sizeof(struct nameidata));
6268+ if (!flags)
6269+ return err;
6270+
6271+ switch (flags) {
6272+ case LOOKUP_CREATE:
6273+ nd->intent.open.flags |= O_CREAT;
6274+ /* fall through: shared code for create/open cases */
6275+ case LOOKUP_OPEN:
6276+ nd->flags = flags;
6277+ nd->intent.open.flags |= (FMODE_READ | FMODE_WRITE);
6278+#ifdef ALLOC_LOWER_ND_FILE
6279+ file = kzalloc(sizeof(struct file), GFP_KERNEL);
6280+ if (unlikely(!file)) {
6281+ err = -ENOMEM;
6282+ break; /* exit switch statement and thus return */
6283+ }
6284+ nd->intent.open.file = file;
6285+#endif /* ALLOC_LOWER_ND_FILE */
6286+ break;
6287+ default:
6288+ /*
6289+ * We should never get here, for now.
6290+ * We can add new cases here later on.
6291+ */
6292+ pr_debug("unionfs: unknown nameidata flag 0x%x\n", flags);
6293+ BUG();
6294+ break;
6295+ }
6296+
6297+ return err;
6298+}
6299+
6300+void release_lower_nd(struct nameidata *nd, int err)
6301+{
6302+ if (!nd->intent.open.file)
6303+ return;
6304+ else if (!err)
6305+ release_open_intent(nd);
6306+#ifdef ALLOC_LOWER_ND_FILE
6307+ kfree(nd->intent.open.file);
6308+#endif /* ALLOC_LOWER_ND_FILE */
6309+}
6310+
6311+/*
6312+ * Main (and complex) driver function for Unionfs's lookup
6313+ *
6314+ * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error
6315+ * PTR if d_splice returned a different dentry.
6316+ *
6317+ * If lookupmode is INTERPOSE_PARTIAL/REVAL/REVAL_NEG, the passed dentry's
6318+ * inode info must be locked. If lookupmode is INTERPOSE_LOOKUP (i.e., a
6319+ * newly looked-up dentry), then unionfs_lookup_backend will return a locked
6320+ * dentry's info, which the caller must unlock.
6321+ */
6322+struct dentry *unionfs_lookup_full(struct dentry *dentry,
6323+ struct dentry *parent, int lookupmode)
6324+{
6325+ int err = 0;
6326+ struct dentry *lower_dentry = NULL;
6327+ struct vfsmount *lower_mnt;
6328+ struct vfsmount *lower_dir_mnt;
6329+ struct dentry *wh_lower_dentry = NULL;
6330+ struct dentry *lower_dir_dentry = NULL;
6331+ struct dentry *d_interposed = NULL;
6332+ int bindex, bstart, bend, bopaque;
6333+ int opaque, num_positive = 0;
6334+ const char *name;
6335+ int namelen;
6336+ int pos_start, pos_end;
6337+
6338+ /*
6339+ * We should already have a lock on this dentry in the case of a
6340+ * partial lookup, or a revalidation. Otherwise it is returned from
6341+ * new_dentry_private_data already locked.
6342+ */
6343+ verify_locked(dentry);
6344+ verify_locked(parent);
6345+
6346+ /* must initialize dentry operations */
6347+ dentry->d_op = &unionfs_dops;
6348+
6349+ /* We never partial lookup the root directory. */
6350+ if (IS_ROOT(dentry))
6351+ goto out;
6352+
6353+ name = dentry->d_name.name;
6354+ namelen = dentry->d_name.len;
6355+
6356+ /* No dentries should get created for possible whiteout names. */
6357+ if (!is_validname(name)) {
6358+ err = -EPERM;
6359+ goto out_free;
6360+ }
6361+
6362+ /* Now start the actual lookup procedure. */
6363+ bstart = dbstart(parent);
6364+ bend = dbend(parent);
6365+ bopaque = dbopaque(parent);
6366+ BUG_ON(bstart < 0);
6367+
6368+ /* adjust bend to bopaque if needed */
6369+ if ((bopaque >= 0) && (bopaque < bend))
6370+ bend = bopaque;
6371+
6372+ /* lookup all possible dentries */
6373+ for (bindex = bstart; bindex <= bend; bindex++) {
6374+
6375+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6376+ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
6377+
6378+ /* skip if we already have a positive lower dentry */
6379+ if (lower_dentry) {
6380+ if (dbstart(dentry) < 0)
6381+ dbstart(dentry) = bindex;
6382+ if (bindex > dbend(dentry))
6383+ dbend(dentry) = bindex;
6384+ if (lower_dentry->d_inode)
6385+ num_positive++;
6386+ continue;
6387+ }
6388+
6389+ lower_dir_dentry =
6390+ unionfs_lower_dentry_idx(parent, bindex);
6391+ /* if the lower dentry's parent does not exist, skip this */
6392+ if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
6393+ continue;
6394+
6395+ /* also skip it if the parent isn't a directory. */
6396+ if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
6397+ continue; /* XXX: should be BUG_ON */
6398+
6399+ /* check for whiteouts: stop lookup if found */
6400+ wh_lower_dentry = lookup_whiteout(name, lower_dir_dentry);
6401+ if (IS_ERR(wh_lower_dentry)) {
6402+ err = PTR_ERR(wh_lower_dentry);
6403+ goto out_free;
6404+ }
6405+ if (wh_lower_dentry->d_inode) {
6406+ dbend(dentry) = dbopaque(dentry) = bindex;
6407+ if (dbstart(dentry) < 0)
6408+ dbstart(dentry) = bindex;
6409+ dput(wh_lower_dentry);
6410+ break;
6411+ }
6412+ dput(wh_lower_dentry);
6413+
6414+ /* Now do regular lookup; lookup @name */
6415+ lower_dir_mnt = unionfs_lower_mnt_idx(parent, bindex);
6416+ lower_mnt = NULL; /* XXX: needed? */
6417+
6418+ lower_dentry = __lookup_one(lower_dir_dentry, lower_dir_mnt,
6419+ name, &lower_mnt);
6420+
6421+ if (IS_ERR(lower_dentry)) {
6422+ err = PTR_ERR(lower_dentry);
6423+ goto out_free;
6424+ }
6425+ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6426+ if (!lower_mnt)
6427+ lower_mnt = unionfs_mntget(dentry->d_sb->s_root,
6428+ bindex);
6429+ unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
6430+
6431+ /* adjust dbstart/end */
6432+ if (dbstart(dentry) < 0)
6433+ dbstart(dentry) = bindex;
6434+ if (bindex > dbend(dentry))
6435+ dbend(dentry) = bindex;
6436+ /*
6437+ * We always store the lower dentries above, and update
6438+ * dbstart/dbend, even if the whole unionfs dentry is
6439+ * negative (i.e., no lower inodes).
6440+ */
6441+ if (!lower_dentry->d_inode)
6442+ continue;
6443+ num_positive++;
6444+
6445+ /*
6446+ * check if we just found an opaque directory, if so, stop
6447+ * lookups here.
6448+ */
6449+ if (!S_ISDIR(lower_dentry->d_inode->i_mode))
6450+ continue;
6451+ opaque = is_opaque_dir(dentry, bindex);
6452+ if (opaque < 0) {
6453+ err = opaque;
6454+ goto out_free;
6455+ } else if (opaque) {
6456+ dbend(dentry) = dbopaque(dentry) = bindex;
6457+ break;
6458+ }
6459+ dbend(dentry) = bindex;
6460+
6461+ /* update parent directory's atime with the bindex */
6462+ fsstack_copy_attr_atime(parent->d_inode,
6463+ lower_dir_dentry->d_inode);
6464+ }
6465+
6466+ /* sanity checks, then decide if to process a negative dentry */
6467+ BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
6468+ BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
6469+
6470+ if (num_positive > 0)
6471+ goto out_positive;
6472+
6473+ /*** handle NEGATIVE dentries ***/
6474+
6475+ /*
6476+ * If negative, keep only first lower negative dentry, to save on
6477+ * memory.
6478+ */
6479+ if (dbstart(dentry) < dbend(dentry)) {
6480+ path_put_lowers(dentry, dbstart(dentry) + 1,
6481+ dbend(dentry), false);
6482+ dbend(dentry) = dbstart(dentry);
6483+ }
6484+ if (lookupmode == INTERPOSE_PARTIAL)
6485+ goto out;
6486+ if (lookupmode == INTERPOSE_LOOKUP) {
6487+ /*
6488+ * If all we found was a whiteout in the first available
6489+ * branch, then create a negative dentry for a possibly new
6490+ * file to be created.
6491+ */
6492+ if (dbopaque(dentry) < 0)
6493+ goto out;
6494+ /* XXX: need to get mnt here */
6495+ bindex = dbstart(dentry);
6496+ if (unionfs_lower_dentry_idx(dentry, bindex))
6497+ goto out;
6498+ lower_dir_dentry =
6499+ unionfs_lower_dentry_idx(parent, bindex);
6500+ if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
6501+ goto out;
6502+ if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
6503+ goto out; /* XXX: should be BUG_ON */
6504+ /* XXX: do we need to cross bind mounts here? */
6505+ lower_dentry = lookup_lck_len(name, lower_dir_dentry, namelen);
6506+ if (IS_ERR(lower_dentry)) {
6507+ err = PTR_ERR(lower_dentry);
6508+ goto out;
6509+ }
6510+ /* XXX: need to mntget/mntput as needed too! */
6511+ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6512+ /* XXX: wrong mnt for crossing bind mounts! */
6513+ lower_mnt = unionfs_mntget(dentry->d_sb->s_root, bindex);
6514+ unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
6515+
6516+ goto out;
6517+ }
6518+
6519+ /* if we're revalidating a positive dentry, don't make it negative */
6520+ if (lookupmode != INTERPOSE_REVAL)
6521+ d_add(dentry, NULL);
6522+
6523+ goto out;
6524+
6525+out_positive:
6526+ /*** handle POSITIVE dentries ***/
6527+
6528+ /*
6529+ * This unionfs dentry is positive (at least one lower inode
6530+ * exists), so scan entire dentry from beginning to end, and remove
6531+ * any negative lower dentries, if any. Then, update dbstart/dbend
6532+ * to reflect the start/end of positive dentries.
6533+ */
6534+ pos_start = pos_end = -1;
6535+ for (bindex = bstart; bindex <= bend; bindex++) {
6536+ lower_dentry = unionfs_lower_dentry_idx(dentry,
6537+ bindex);
6538+ if (lower_dentry && lower_dentry->d_inode) {
6539+ if (pos_start < 0)
6540+ pos_start = bindex;
6541+ if (bindex > pos_end)
6542+ pos_end = bindex;
6543+ continue;
6544+ }
6545+ path_put_lowers(dentry, bindex, bindex, false);
6546+ }
6547+ if (pos_start >= 0)
6548+ dbstart(dentry) = pos_start;
6549+ if (pos_end >= 0)
6550+ dbend(dentry) = pos_end;
6551+
6552+ /* Partial lookups need to re-interpose, or throw away older negs. */
6553+ if (lookupmode == INTERPOSE_PARTIAL) {
6554+ if (dentry->d_inode) {
6555+ unionfs_reinterpose(dentry);
6556+ goto out;
6557+ }
6558+
6559+ /*
6560+ * This dentry was positive, so it is as if we had a
6561+ * negative revalidation.
6562+ */
6563+ lookupmode = INTERPOSE_REVAL_NEG;
6564+ update_bstart(dentry);
6565+ }
6566+
6567+ /*
6568+ * Interpose can return a dentry if d_splice returned a different
6569+ * dentry.
6570+ */
6571+ d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
6572+ if (IS_ERR(d_interposed))
6573+ err = PTR_ERR(d_interposed);
6574+ else if (d_interposed)
6575+ dentry = d_interposed;
6576+
6577+ if (!err)
6578+ goto out;
6579+ d_drop(dentry);
6580+
6581+out_free:
6582+ /* should dput/mntput all the underlying dentries on error condition */
6583+ if (dbstart(dentry) >= 0)
6584+ path_put_lowers_all(dentry, false);
6585+ /* free lower_paths unconditionally */
6586+ kfree(UNIONFS_D(dentry)->lower_paths);
6587+ UNIONFS_D(dentry)->lower_paths = NULL;
6588+
6589+out:
6590+ if (dentry && UNIONFS_D(dentry)) {
6591+ BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
6592+ BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
6593+ }
6594+ if (d_interposed && UNIONFS_D(d_interposed)) {
6595+ BUG_ON(dbstart(d_interposed) < 0 && dbend(d_interposed) >= 0);
6596+ BUG_ON(dbstart(d_interposed) >= 0 && dbend(d_interposed) < 0);
6597+ }
6598+
6599+ if (!err && d_interposed)
6600+ return d_interposed;
6601+ return ERR_PTR(err);
6602+}
6603diff --git a/fs/unionfs/main.c b/fs/unionfs/main.c
6604new file mode 100644
6605index 0000000..fa52f61
6606--- /dev/null
6607+++ b/fs/unionfs/main.c
6608@@ -0,0 +1,763 @@
6609+/*
6610+ * Copyright (c) 2003-2011 Erez Zadok
6611+ * Copyright (c) 2003-2006 Charles P. Wright
6612+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
6613+ * Copyright (c) 2005-2006 Junjiro Okajima
6614+ * Copyright (c) 2005 Arun M. Krishnakumar
6615+ * Copyright (c) 2004-2006 David P. Quigley
6616+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
6617+ * Copyright (c) 2003 Puja Gupta
6618+ * Copyright (c) 2003 Harikesavan Krishnan
6619+ * Copyright (c) 2003-2011 Stony Brook University
6620+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
6621+ *
6622+ * This program is free software; you can redistribute it and/or modify
6623+ * it under the terms of the GNU General Public License version 2 as
6624+ * published by the Free Software Foundation.
6625+ */
6626+
6627+#include "union.h"
6628+#include <linux/module.h>
6629+#include <linux/moduleparam.h>
6630+
6631+static void unionfs_fill_inode(struct dentry *dentry,
6632+ struct inode *inode)
6633+{
6634+ struct inode *lower_inode;
6635+ struct dentry *lower_dentry;
6636+ int bindex, bstart, bend;
6637+
6638+ bstart = dbstart(dentry);
6639+ bend = dbend(dentry);
6640+
6641+ for (bindex = bstart; bindex <= bend; bindex++) {
6642+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6643+ if (!lower_dentry) {
6644+ unionfs_set_lower_inode_idx(inode, bindex, NULL);
6645+ continue;
6646+ }
6647+
6648+ /* Initialize the lower inode to the new lower inode. */
6649+ if (!lower_dentry->d_inode)
6650+ continue;
6651+
6652+ unionfs_set_lower_inode_idx(inode, bindex,
6653+ igrab(lower_dentry->d_inode));
6654+ }
6655+
6656+ ibstart(inode) = dbstart(dentry);
6657+ ibend(inode) = dbend(dentry);
6658+
6659+ /* Use attributes from the first branch. */
6660+ lower_inode = unionfs_lower_inode(inode);
6661+
6662+ /* Use different set of inode ops for symlinks & directories */
6663+ if (S_ISLNK(lower_inode->i_mode))
6664+ inode->i_op = &unionfs_symlink_iops;
6665+ else if (S_ISDIR(lower_inode->i_mode))
6666+ inode->i_op = &unionfs_dir_iops;
6667+
6668+ /* Use different set of file ops for directories */
6669+ if (S_ISDIR(lower_inode->i_mode))
6670+ inode->i_fop = &unionfs_dir_fops;
6671+
6672+ /* properly initialize special inodes */
6673+ if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
6674+ S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
6675+ init_special_inode(inode, lower_inode->i_mode,
6676+ lower_inode->i_rdev);
6677+
6678+ /* all well, copy inode attributes */
6679+ unionfs_copy_attr_all(inode, lower_inode);
6680+ fsstack_copy_inode_size(inode, lower_inode);
6681+}
6682+
6683+/*
6684+ * Connect a unionfs inode dentry/inode with several lower ones. This is
6685+ * the classic stackable file system "vnode interposition" action.
6686+ *
6687+ * @sb: unionfs's super_block
6688+ */
6689+struct dentry *unionfs_interpose(struct dentry *dentry, struct super_block *sb,
6690+ int flag)
6691+{
6692+ int err = 0;
6693+ struct inode *inode;
6694+ int need_fill_inode = 1;
6695+ struct dentry *spliced = NULL;
6696+
6697+ verify_locked(dentry);
6698+
6699+ /*
6700+ * We allocate our new inode below by calling unionfs_iget,
6701+ * which will initialize some of the new inode's fields
6702+ */
6703+
6704+ /*
6705+ * On revalidate we've already got our own inode and just need
6706+ * to fix it up.
6707+ */
6708+ if (flag == INTERPOSE_REVAL) {
6709+ inode = dentry->d_inode;
6710+ UNIONFS_I(inode)->bstart = -1;
6711+ UNIONFS_I(inode)->bend = -1;
6712+ atomic_set(&UNIONFS_I(inode)->generation,
6713+ atomic_read(&UNIONFS_SB(sb)->generation));
6714+
6715+ UNIONFS_I(inode)->lower_inodes =
6716+ kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL);
6717+ if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
6718+ err = -ENOMEM;
6719+ goto out;
6720+ }
6721+ } else {
6722+ /* get unique inode number for unionfs */
6723+ inode = unionfs_iget(sb, iunique(sb, UNIONFS_ROOT_INO));
6724+ if (IS_ERR(inode)) {
6725+ err = PTR_ERR(inode);
6726+ goto out;
6727+ }
6728+ if (atomic_read(&inode->i_count) > 1)
6729+ goto skip;
6730+ }
6731+
6732+ need_fill_inode = 0;
6733+ unionfs_fill_inode(dentry, inode);
6734+
6735+skip:
6736+ /* only (our) lookup wants to do a d_add */
6737+ switch (flag) {
6738+ case INTERPOSE_DEFAULT:
6739+ /* for operations which create new inodes */
6740+ d_add(dentry, inode);
6741+ break;
6742+ case INTERPOSE_REVAL_NEG:
6743+ d_instantiate(dentry, inode);
6744+ break;
6745+ case INTERPOSE_LOOKUP:
6746+ spliced = d_splice_alias(inode, dentry);
6747+ if (spliced && spliced != dentry) {
6748+ /*
6749+ * d_splice can return a dentry if it was
6750+ * disconnected and had to be moved. We must ensure
6751+ * that the private data of the new dentry is
6752+ * correct and that the inode info was filled
6753+ * properly. Finally we must return this new
6754+ * dentry.
6755+ */
6756+ spliced->d_op = &unionfs_dops;
6757+ spliced->d_fsdata = dentry->d_fsdata;
6758+ dentry->d_fsdata = NULL;
6759+ dentry = spliced;
6760+ if (need_fill_inode) {
6761+ need_fill_inode = 0;
6762+ unionfs_fill_inode(dentry, inode);
6763+ }
6764+ goto out_spliced;
6765+ } else if (!spliced) {
6766+ if (need_fill_inode) {
6767+ need_fill_inode = 0;
6768+ unionfs_fill_inode(dentry, inode);
6769+ goto out_spliced;
6770+ }
6771+ }
6772+ break;
6773+ case INTERPOSE_REVAL:
6774+ /* Do nothing. */
6775+ break;
6776+ default:
6777+ printk(KERN_CRIT "unionfs: invalid interpose flag passed!\n");
6778+ BUG();
6779+ }
6780+ goto out;
6781+
6782+out_spliced:
6783+ if (!err)
6784+ return spliced;
6785+out:
6786+ return ERR_PTR(err);
6787+}
6788+
6789+/* like interpose above, but for an already existing dentry */
6790+void unionfs_reinterpose(struct dentry *dentry)
6791+{
6792+ struct dentry *lower_dentry;
6793+ struct inode *inode;
6794+ int bindex, bstart, bend;
6795+
6796+ verify_locked(dentry);
6797+
6798+ /* This is pre-allocated inode */
6799+ inode = dentry->d_inode;
6800+
6801+ bstart = dbstart(dentry);
6802+ bend = dbend(dentry);
6803+ for (bindex = bstart; bindex <= bend; bindex++) {
6804+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6805+ if (!lower_dentry)
6806+ continue;
6807+
6808+ if (!lower_dentry->d_inode)
6809+ continue;
6810+ if (unionfs_lower_inode_idx(inode, bindex))
6811+ continue;
6812+ unionfs_set_lower_inode_idx(inode, bindex,
6813+ igrab(lower_dentry->d_inode));
6814+ }
6815+ ibstart(inode) = dbstart(dentry);
6816+ ibend(inode) = dbend(dentry);
6817+}
6818+
6819+/*
6820+ * make sure the branch we just looked up (nd) makes sense:
6821+ *
6822+ * 1) we're not trying to stack unionfs on top of unionfs
6823+ * 2) it exists
6824+ * 3) is a directory
6825+ */
6826+int check_branch(const struct path *path)
6827+{
6828+ /* XXX: remove in ODF code -- stacking unions allowed there */
6829+ if (!strcmp(path->dentry->d_sb->s_type->name, UNIONFS_NAME))
6830+ return -EINVAL;
6831+ if (!path->dentry->d_inode)
6832+ return -ENOENT;
6833+ if (!S_ISDIR(path->dentry->d_inode->i_mode))
6834+ return -ENOTDIR;
6835+ return 0;
6836+}
6837+
6838+/* checks if two lower_dentries have overlapping branches */
6839+static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2)
6840+{
6841+ struct dentry *dent = NULL;
6842+
6843+ dent = dent1;
6844+ while ((dent != dent2) && (dent->d_parent != dent))
6845+ dent = dent->d_parent;
6846+
6847+ if (dent == dent2)
6848+ return 1;
6849+
6850+ dent = dent2;
6851+ while ((dent != dent1) && (dent->d_parent != dent))
6852+ dent = dent->d_parent;
6853+
6854+ return (dent == dent1);
6855+}
6856+
6857+/*
6858+ * Parse "ro" or "rw" options, but default to "rw" if no mode options was
6859+ * specified. Fill the mode bits in @perms. If encounter an unknown
6860+ * string, return -EINVAL. Otherwise return 0.
6861+ */
6862+int parse_branch_mode(const char *name, int *perms)
6863+{
6864+ if (!name || !strcmp(name, "rw")) {
6865+ *perms = MAY_READ | MAY_WRITE;
6866+ return 0;
6867+ }
6868+ if (!strcmp(name, "ro")) {
6869+ *perms = MAY_READ;
6870+ return 0;
6871+ }
6872+ return -EINVAL;
6873+}
6874+
6875+/*
6876+ * parse the dirs= mount argument
6877+ *
6878+ * We don't need to lock the superblock private data's rwsem, as we get
6879+ * called only by unionfs_read_super - it is still a long time before anyone
6880+ * can even get a reference to us.
6881+ */
6882+static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info
6883+ *lower_root_info, char *options)
6884+{
6885+ struct path path;
6886+ char *name;
6887+ int err = 0;
6888+ int branches = 1;
6889+ int bindex = 0;
6890+ int i = 0;
6891+ int j = 0;
6892+ struct dentry *dent1;
6893+ struct dentry *dent2;
6894+
6895+ if (options[0] == '\0') {
6896+ printk(KERN_ERR "unionfs: no branches specified\n");
6897+ err = -EINVAL;
6898+ goto out_return;
6899+ }
6900+
6901+ /*
6902+ * Each colon means we have a separator, this is really just a rough
6903+ * guess, since strsep will handle empty fields for us.
6904+ */
6905+ for (i = 0; options[i]; i++)
6906+ if (options[i] == ':')
6907+ branches++;
6908+
6909+ /* allocate space for underlying pointers to lower dentry */
6910+ UNIONFS_SB(sb)->data =
6911+ kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL);
6912+ if (unlikely(!UNIONFS_SB(sb)->data)) {
6913+ err = -ENOMEM;
6914+ goto out_return;
6915+ }
6916+
6917+ lower_root_info->lower_paths =
6918+ kcalloc(branches, sizeof(struct path), GFP_KERNEL);
6919+ if (unlikely(!lower_root_info->lower_paths)) {
6920+ err = -ENOMEM;
6921+ /* free the underlying pointer array */
6922+ kfree(UNIONFS_SB(sb)->data);
6923+ UNIONFS_SB(sb)->data = NULL;
6924+ goto out_return;
6925+ }
6926+
6927+ /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */
6928+ branches = 0;
6929+ while ((name = strsep(&options, ":")) != NULL) {
6930+ int perms;
6931+ char *mode = strchr(name, '=');
6932+
6933+ if (!name)
6934+ continue;
6935+ if (!*name) { /* bad use of ':' (extra colons) */
6936+ err = -EINVAL;
6937+ goto out;
6938+ }
6939+
6940+ branches++;
6941+
6942+ /* strip off '=' if any */
6943+ if (mode)
6944+ *mode++ = '\0';
6945+
6946+ err = parse_branch_mode(mode, &perms);
6947+ if (err) {
6948+ printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
6949+ "branch %d\n", mode, bindex);
6950+ goto out;
6951+ }
6952+ /* ensure that leftmost branch is writeable */
6953+ if (!bindex && !(perms & MAY_WRITE)) {
6954+ printk(KERN_ERR "unionfs: leftmost branch cannot be "
6955+ "read-only (use \"-o ro\" to create a "
6956+ "read-only union)\n");
6957+ err = -EINVAL;
6958+ goto out;
6959+ }
6960+
6961+ err = kern_path(name, LOOKUP_FOLLOW, &path);
6962+ if (err) {
6963+ printk(KERN_ERR "unionfs: error accessing "
6964+ "lower directory '%s' (error %d)\n",
6965+ name, err);
6966+ goto out;
6967+ }
6968+
6969+ err = check_branch(&path);
6970+ if (err) {
6971+ printk(KERN_ERR "unionfs: lower directory "
6972+ "'%s' is not a valid branch\n", name);
6973+ path_put(&path);
6974+ goto out;
6975+ }
6976+
6977+ lower_root_info->lower_paths[bindex].dentry = path.dentry;
6978+ lower_root_info->lower_paths[bindex].mnt = path.mnt;
6979+
6980+ set_branchperms(sb, bindex, perms);
6981+ set_branch_count(sb, bindex, 0);
6982+ new_branch_id(sb, bindex);
6983+
6984+ if (lower_root_info->bstart < 0)
6985+ lower_root_info->bstart = bindex;
6986+ lower_root_info->bend = bindex;
6987+ bindex++;
6988+ }
6989+
6990+ if (branches == 0) {
6991+ printk(KERN_ERR "unionfs: no branches specified\n");
6992+ err = -EINVAL;
6993+ goto out;
6994+ }
6995+
6996+ BUG_ON(branches != (lower_root_info->bend + 1));
6997+
6998+ /*
6999+ * Ensure that no overlaps exist in the branches.
7000+ *
7001+ * This test is required because the Linux kernel has no support
7002+ * currently for ensuring coherency between stackable layers and
7003+ * branches. If we were to allow overlapping branches, it would be
7004+ * possible, for example, to delete a file via one branch, which
7005+ * would not be reflected in another branch. Such incoherency could
7006+ * lead to inconsistencies and even kernel oopses. Rather than
7007+ * implement hacks to work around some of these cache-coherency
7008+ * problems, we prevent branch overlapping, for now. A complete
7009+ * solution will involve proper kernel/VFS support for cache
7010+ * coherency, at which time we could safely remove this
7011+ * branch-overlapping test.
7012+ */
7013+ for (i = 0; i < branches; i++) {
7014+ dent1 = lower_root_info->lower_paths[i].dentry;
7015+ for (j = i + 1; j < branches; j++) {
7016+ dent2 = lower_root_info->lower_paths[j].dentry;
7017+ if (is_branch_overlap(dent1, dent2)) {
7018+ printk(KERN_ERR "unionfs: branches %d and "
7019+ "%d overlap\n", i, j);
7020+ err = -EINVAL;
7021+ goto out;
7022+ }
7023+ }
7024+ }
7025+
7026+out:
7027+ if (err) {
7028+ for (i = 0; i < branches; i++)
7029+ path_put(&lower_root_info->lower_paths[i]);
7030+
7031+ kfree(lower_root_info->lower_paths);
7032+ kfree(UNIONFS_SB(sb)->data);
7033+
7034+ /*
7035+ * MUST clear the pointers to prevent potential double free if
7036+ * the caller dies later on
7037+ */
7038+ lower_root_info->lower_paths = NULL;
7039+ UNIONFS_SB(sb)->data = NULL;
7040+ }
7041+out_return:
7042+ return err;
7043+}
7044+
7045+/*
7046+ * Parse mount options. See the manual page for usage instructions.
7047+ *
7048+ * Returns the dentry object of the lower-level (lower) directory;
7049+ * We want to mount our stackable file system on top of that lower directory.
7050+ */
7051+static struct unionfs_dentry_info *unionfs_parse_options(
7052+ struct super_block *sb,
7053+ char *options)
7054+{
7055+ struct unionfs_dentry_info *lower_root_info;
7056+ char *optname;
7057+ int err = 0;
7058+ int bindex;
7059+ int dirsfound = 0;
7060+
7061+ /* allocate private data area */
7062+ err = -ENOMEM;
7063+ lower_root_info =
7064+ kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL);
7065+ if (unlikely(!lower_root_info))
7066+ goto out_error;
7067+ lower_root_info->bstart = -1;
7068+ lower_root_info->bend = -1;
7069+ lower_root_info->bopaque = -1;
7070+
7071+ while ((optname = strsep(&options, ",")) != NULL) {
7072+ char *optarg;
7073+
7074+ if (!optname || !*optname)
7075+ continue;
7076+
7077+ optarg = strchr(optname, '=');
7078+ if (optarg)
7079+ *optarg++ = '\0';
7080+
7081+ /*
7082+ * All of our options take an argument now. Insert ones that
7083+ * don't, above this check.
7084+ */
7085+ if (!optarg) {
7086+ printk(KERN_ERR "unionfs: %s requires an argument\n",
7087+ optname);
7088+ err = -EINVAL;
7089+ goto out_error;
7090+ }
7091+
7092+ if (!strcmp("dirs", optname)) {
7093+ if (++dirsfound > 1) {
7094+ printk(KERN_ERR
7095+ "unionfs: multiple dirs specified\n");
7096+ err = -EINVAL;
7097+ goto out_error;
7098+ }
7099+ err = parse_dirs_option(sb, lower_root_info, optarg);
7100+ if (err)
7101+ goto out_error;
7102+ continue;
7103+ }
7104+
7105+ err = -EINVAL;
7106+ printk(KERN_ERR
7107+ "unionfs: unrecognized option '%s'\n", optname);
7108+ goto out_error;
7109+ }
7110+ if (dirsfound != 1) {
7111+ printk(KERN_ERR "unionfs: dirs option required\n");
7112+ err = -EINVAL;
7113+ goto out_error;
7114+ }
7115+ goto out;
7116+
7117+out_error:
7118+ if (lower_root_info && lower_root_info->lower_paths) {
7119+ for (bindex = lower_root_info->bstart;
7120+ bindex >= 0 && bindex <= lower_root_info->bend;
7121+ bindex++)
7122+ path_put(&lower_root_info->lower_paths[bindex]);
7123+ }
7124+
7125+ kfree(lower_root_info->lower_paths);
7126+ kfree(lower_root_info);
7127+
7128+ kfree(UNIONFS_SB(sb)->data);
7129+ UNIONFS_SB(sb)->data = NULL;
7130+
7131+ lower_root_info = ERR_PTR(err);
7132+out:
7133+ return lower_root_info;
7134+}
7135+
7136+/*
7137+ * our custom d_alloc_root work-alike
7138+ *
7139+ * we can't use d_alloc_root if we want to use our own interpose function
7140+ * unchanged, so we simply call our own "fake" d_alloc_root
7141+ */
7142+static struct dentry *unionfs_d_alloc_root(struct super_block *sb)
7143+{
7144+ struct dentry *ret = NULL;
7145+
7146+ if (sb) {
7147+ static const struct qstr name = {
7148+ .name = "/",
7149+ .len = 1
7150+ };
7151+
7152+ ret = d_alloc(NULL, &name);
7153+ if (likely(ret)) {
7154+ ret->d_op = &unionfs_dops;
7155+ ret->d_sb = sb;
7156+ ret->d_parent = ret;
7157+ }
7158+ }
7159+ return ret;
7160+}
7161+
7162+/*
7163+ * There is no need to lock the unionfs_super_info's rwsem as there is no
7164+ * way anyone can have a reference to the superblock at this point in time.
7165+ */
7166+static int unionfs_read_super(struct super_block *sb, void *raw_data,
7167+ int silent)
7168+{
7169+ int err = 0;
7170+ struct unionfs_dentry_info *lower_root_info = NULL;
7171+ int bindex, bstart, bend;
7172+
7173+ if (!raw_data) {
7174+ printk(KERN_ERR
7175+ "unionfs: read_super: missing data argument\n");
7176+ err = -EINVAL;
7177+ goto out;
7178+ }
7179+
7180+ /* Allocate superblock private data */
7181+ sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL);
7182+ if (unlikely(!UNIONFS_SB(sb))) {
7183+ printk(KERN_CRIT "unionfs: read_super: out of memory\n");
7184+ err = -ENOMEM;
7185+ goto out;
7186+ }
7187+
7188+ UNIONFS_SB(sb)->bend = -1;
7189+ atomic_set(&UNIONFS_SB(sb)->generation, 1);
7190+ init_rwsem(&UNIONFS_SB(sb)->rwsem);
7191+ UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */
7192+
7193+ lower_root_info = unionfs_parse_options(sb, raw_data);
7194+ if (IS_ERR(lower_root_info)) {
7195+ printk(KERN_ERR
7196+ "unionfs: read_super: error while parsing options "
7197+ "(err = %ld)\n", PTR_ERR(lower_root_info));
7198+ err = PTR_ERR(lower_root_info);
7199+ lower_root_info = NULL;
7200+ goto out_free;
7201+ }
7202+ if (lower_root_info->bstart == -1) {
7203+ err = -ENOENT;
7204+ goto out_free;
7205+ }
7206+
7207+ /* set the lower superblock field of upper superblock */
7208+ bstart = lower_root_info->bstart;
7209+ BUG_ON(bstart != 0);
7210+ sbend(sb) = bend = lower_root_info->bend;
7211+ for (bindex = bstart; bindex <= bend; bindex++) {
7212+ struct dentry *d = lower_root_info->lower_paths[bindex].dentry;
7213+ atomic_inc(&d->d_sb->s_active);
7214+ unionfs_set_lower_super_idx(sb, bindex, d->d_sb);
7215+ }
7216+
7217+ /* max Bytes is the maximum bytes from highest priority branch */
7218+ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
7219+
7220+ /*
7221+ * Our c/m/atime granularity is 1 ns because we may stack on file
7222+ * systems whose granularity is as good. This is important for our
7223+ * time-based cache coherency.
7224+ */
7225+ sb->s_time_gran = 1;
7226+
7227+ sb->s_op = &unionfs_sops;
7228+
7229+ /* See comment next to the definition of unionfs_d_alloc_root */
7230+ sb->s_root = unionfs_d_alloc_root(sb);
7231+ if (unlikely(!sb->s_root)) {
7232+ err = -ENOMEM;
7233+ goto out_dput;
7234+ }
7235+
7236+ /* link the upper and lower dentries */
7237+ sb->s_root->d_fsdata = NULL;
7238+ err = new_dentry_private_data(sb->s_root, UNIONFS_DMUTEX_ROOT);
7239+ if (unlikely(err))
7240+ goto out_freedpd;
7241+
7242+ /* Set the lower dentries for s_root */
7243+ for (bindex = bstart; bindex <= bend; bindex++) {
7244+ struct dentry *d;
7245+ struct vfsmount *m;
7246+
7247+ d = lower_root_info->lower_paths[bindex].dentry;
7248+ m = lower_root_info->lower_paths[bindex].mnt;
7249+
7250+ unionfs_set_lower_dentry_idx(sb->s_root, bindex, d);
7251+ unionfs_set_lower_mnt_idx(sb->s_root, bindex, m);
7252+ }
7253+ dbstart(sb->s_root) = bstart;
7254+ dbend(sb->s_root) = bend;
7255+
7256+ /* Set the generation number to one, since this is for the mount. */
7257+ atomic_set(&UNIONFS_D(sb->s_root)->generation, 1);
7258+
7259+ /*
7260+ * Call interpose to create the upper level inode. Only
7261+ * INTERPOSE_LOOKUP can return a value other than 0 on err.
7262+ */
7263+ err = PTR_ERR(unionfs_interpose(sb->s_root, sb, 0));
7264+ unionfs_unlock_dentry(sb->s_root);
7265+ if (!err)
7266+ goto out;
7267+ /* else fall through */
7268+
7269+out_freedpd:
7270+ if (UNIONFS_D(sb->s_root)) {
7271+ kfree(UNIONFS_D(sb->s_root)->lower_paths);
7272+ free_dentry_private_data(sb->s_root);
7273+ }
7274+ dput(sb->s_root);
7275+
7276+out_dput:
7277+ if (lower_root_info && !IS_ERR(lower_root_info)) {
7278+ for (bindex = lower_root_info->bstart;
7279+ bindex <= lower_root_info->bend; bindex++) {
7280+ struct dentry *d;
7281+ d = lower_root_info->lower_paths[bindex].dentry;
7282+ /* drop refs we took earlier */
7283+ atomic_dec(&d->d_sb->s_active);
7284+ path_put(&lower_root_info->lower_paths[bindex]);
7285+ }
7286+ kfree(lower_root_info->lower_paths);
7287+ kfree(lower_root_info);
7288+ lower_root_info = NULL;
7289+ }
7290+
7291+out_free:
7292+ kfree(UNIONFS_SB(sb)->data);
7293+ kfree(UNIONFS_SB(sb));
7294+ sb->s_fs_info = NULL;
7295+
7296+out:
7297+ if (lower_root_info && !IS_ERR(lower_root_info)) {
7298+ kfree(lower_root_info->lower_paths);
7299+ kfree(lower_root_info);
7300+ }
7301+ return err;
7302+}
7303+
7304+static struct dentry *unionfs_mount(struct file_system_type *fs_type,
7305+ int flags, const char *dev_name,
7306+ void *raw_data)
7307+{
7308+ struct dentry *dentry;
7309+
7310+ dentry = mount_nodev(fs_type, flags, raw_data, unionfs_read_super);
7311+ if (!PTR_ERR(dentry))
7312+ UNIONFS_SB(dentry->d_sb)->dev_name =
7313+ kstrdup(dev_name, GFP_KERNEL);
7314+ return dentry;
7315+}
7316+
7317+static struct file_system_type unionfs_fs_type = {
7318+ .owner = THIS_MODULE,
7319+ .name = UNIONFS_NAME,
7320+ .mount = unionfs_mount,
7321+ .kill_sb = generic_shutdown_super,
7322+ .fs_flags = FS_REVAL_DOT,
7323+};
7324+
7325+static int __init init_unionfs_fs(void)
7326+{
7327+ int err;
7328+
7329+ pr_info("Registering unionfs " UNIONFS_VERSION "\n");
7330+
7331+ err = unionfs_init_filldir_cache();
7332+ if (unlikely(err))
7333+ goto out;
7334+ err = unionfs_init_inode_cache();
7335+ if (unlikely(err))
7336+ goto out;
7337+ err = unionfs_init_dentry_cache();
7338+ if (unlikely(err))
7339+ goto out;
7340+ err = init_sioq();
7341+ if (unlikely(err))
7342+ goto out;
7343+ err = register_filesystem(&unionfs_fs_type);
7344+out:
7345+ if (unlikely(err)) {
7346+ stop_sioq();
7347+ unionfs_destroy_filldir_cache();
7348+ unionfs_destroy_inode_cache();
7349+ unionfs_destroy_dentry_cache();
7350+ }
7351+ return err;
7352+}
7353+
7354+static void __exit exit_unionfs_fs(void)
7355+{
7356+ stop_sioq();
7357+ unionfs_destroy_filldir_cache();
7358+ unionfs_destroy_inode_cache();
7359+ unionfs_destroy_dentry_cache();
7360+ unregister_filesystem(&unionfs_fs_type);
7361+ pr_info("Completed unionfs module unload\n");
7362+}
7363+
7364+MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
7365+ " (http://www.fsl.cs.sunysb.edu)");
7366+MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION
7367+ " (http://unionfs.filesystems.org)");
7368+MODULE_LICENSE("GPL");
7369+
7370+module_init(init_unionfs_fs);
7371+module_exit(exit_unionfs_fs);
7372diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
7373new file mode 100644
7374index 0000000..bcc5652
7375--- /dev/null
7376+++ b/fs/unionfs/mmap.c
7377@@ -0,0 +1,89 @@
7378+/*
7379+ * Copyright (c) 2003-2011 Erez Zadok
7380+ * Copyright (c) 2003-2006 Charles P. Wright
7381+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7382+ * Copyright (c) 2005-2006 Junjiro Okajima
7383+ * Copyright (c) 2006 Shaya Potter
7384+ * Copyright (c) 2005 Arun M. Krishnakumar
7385+ * Copyright (c) 2004-2006 David P. Quigley
7386+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7387+ * Copyright (c) 2003 Puja Gupta
7388+ * Copyright (c) 2003 Harikesavan Krishnan
7389+ * Copyright (c) 2003-2011 Stony Brook University
7390+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
7391+ *
7392+ * This program is free software; you can redistribute it and/or modify
7393+ * it under the terms of the GNU General Public License version 2 as
7394+ * published by the Free Software Foundation.
7395+ */
7396+
7397+#include "union.h"
7398+
7399+
7400+/*
7401+ * XXX: we need a dummy readpage handler because generic_file_mmap (which we
7402+ * use in unionfs_mmap) checks for the existence of
7403+ * mapping->a_ops->readpage, else it returns -ENOEXEC. The VFS will need to
7404+ * be fixed to allow a file system to define vm_ops->fault without any
7405+ * address_space_ops whatsoever.
7406+ *
7407+ * Otherwise, we don't want to use our readpage method at all.
7408+ */
7409+static int unionfs_readpage(struct file *file, struct page *page)
7410+{
7411+ BUG();
7412+ return -EINVAL;
7413+}
7414+
7415+static int unionfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
7416+{
7417+ int err;
7418+ struct file *file, *lower_file;
7419+ const struct vm_operations_struct *lower_vm_ops;
7420+ struct vm_area_struct lower_vma;
7421+
7422+ BUG_ON(!vma);
7423+ memcpy(&lower_vma, vma, sizeof(struct vm_area_struct));
7424+ file = lower_vma.vm_file;
7425+ lower_vm_ops = UNIONFS_F(file)->lower_vm_ops;
7426+ BUG_ON(!lower_vm_ops);
7427+
7428+ lower_file = unionfs_lower_file(file);
7429+ BUG_ON(!lower_file);
7430+ /*
7431+ * XXX: vm_ops->fault may be called in parallel. Because we have to
7432+ * resort to temporarily changing the vma->vm_file to point to the
7433+ * lower file, a concurrent invocation of unionfs_fault could see a
7434+ * different value. In this workaround, we keep a different copy of
7435+ * the vma structure in our stack, so we never expose a different
7436+ * value of the vma->vm_file called to us, even temporarily. A
7437+ * better fix would be to change the calling semantics of ->fault to
7438+ * take an explicit file pointer.
7439+ */
7440+ lower_vma.vm_file = lower_file;
7441+ err = lower_vm_ops->fault(&lower_vma, vmf);
7442+ return err;
7443+}
7444+
7445+/*
7446+ * XXX: the default address_space_ops for unionfs is empty. We cannot set
7447+ * our inode->i_mapping->a_ops to NULL because too many code paths expect
7448+ * the a_ops vector to be non-NULL.
7449+ */
7450+struct address_space_operations unionfs_aops = {
7451+ /* empty on purpose */
7452+};
7453+
7454+/*
7455+ * XXX: we need a second, dummy address_space_ops vector, to be used
7456+ * temporarily during unionfs_mmap, because the latter calls
7457+ * generic_file_mmap, which checks if ->readpage exists, else returns
7458+ * -ENOEXEC.
7459+ */
7460+struct address_space_operations unionfs_dummy_aops = {
7461+ .readpage = unionfs_readpage,
7462+};
7463+
7464+struct vm_operations_struct unionfs_vm_ops = {
7465+ .fault = unionfs_fault,
7466+};
7467diff --git a/fs/unionfs/rdstate.c b/fs/unionfs/rdstate.c
7468new file mode 100644
7469index 0000000..59b7333
7470--- /dev/null
7471+++ b/fs/unionfs/rdstate.c
7472@@ -0,0 +1,285 @@
7473+/*
7474+ * Copyright (c) 2003-2011 Erez Zadok
7475+ * Copyright (c) 2003-2006 Charles P. Wright
7476+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7477+ * Copyright (c) 2005-2006 Junjiro Okajima
7478+ * Copyright (c) 2005 Arun M. Krishnakumar
7479+ * Copyright (c) 2004-2006 David P. Quigley
7480+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7481+ * Copyright (c) 2003 Puja Gupta
7482+ * Copyright (c) 2003 Harikesavan Krishnan
7483+ * Copyright (c) 2003-2011 Stony Brook University
7484+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
7485+ *
7486+ * This program is free software; you can redistribute it and/or modify
7487+ * it under the terms of the GNU General Public License version 2 as
7488+ * published by the Free Software Foundation.
7489+ */
7490+
7491+#include "union.h"
7492+
7493+/* This file contains the routines for maintaining readdir state. */
7494+
7495+/*
7496+ * There are two structures here, rdstate which is a hash table
7497+ * of the second structure which is a filldir_node.
7498+ */
7499+
7500+/*
7501+ * This is a struct kmem_cache for filldir nodes, because we allocate a lot
7502+ * of them and they shouldn't waste memory. If the node has a small name
7503+ * (as defined by the dentry structure), then we use an inline name to
7504+ * preserve kmalloc space.
7505+ */
7506+static struct kmem_cache *unionfs_filldir_cachep;
7507+
7508+int unionfs_init_filldir_cache(void)
7509+{
7510+ unionfs_filldir_cachep =
7511+ kmem_cache_create("unionfs_filldir",
7512+ sizeof(struct filldir_node), 0,
7513+ SLAB_RECLAIM_ACCOUNT, NULL);
7514+
7515+ return (unionfs_filldir_cachep ? 0 : -ENOMEM);
7516+}
7517+
7518+void unionfs_destroy_filldir_cache(void)
7519+{
7520+ if (unionfs_filldir_cachep)
7521+ kmem_cache_destroy(unionfs_filldir_cachep);
7522+}
7523+
7524+/*
7525+ * This is a tuning parameter that tells us roughly how big to make the
7526+ * hash table in directory entries per page. This isn't perfect, but
7527+ * at least we get a hash table size that shouldn't be too overloaded.
7528+ * The following averages are based on my home directory.
7529+ * 14.44693 Overall
7530+ * 12.29 Single Page Directories
7531+ * 117.93 Multi-page directories
7532+ */
7533+#define DENTPAGE 4096
7534+#define DENTPERONEPAGE 12
7535+#define DENTPERPAGE 118
7536+#define MINHASHSIZE 1
7537+static int guesstimate_hash_size(struct inode *inode)
7538+{
7539+ struct inode *lower_inode;
7540+ int bindex;
7541+ int hashsize = MINHASHSIZE;
7542+
7543+ if (UNIONFS_I(inode)->hashsize > 0)
7544+ return UNIONFS_I(inode)->hashsize;
7545+
7546+ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
7547+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
7548+ if (!lower_inode)
7549+ continue;
7550+
7551+ if (i_size_read(lower_inode) == DENTPAGE)
7552+ hashsize += DENTPERONEPAGE;
7553+ else
7554+ hashsize += (i_size_read(lower_inode) / DENTPAGE) *
7555+ DENTPERPAGE;
7556+ }
7557+
7558+ return hashsize;
7559+}
7560+
7561+int init_rdstate(struct file *file)
7562+{
7563+ BUG_ON(sizeof(loff_t) !=
7564+ (sizeof(unsigned int) + sizeof(unsigned int)));
7565+ BUG_ON(UNIONFS_F(file)->rdstate != NULL);
7566+
7567+ UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_path.dentry->d_inode,
7568+ fbstart(file));
7569+
7570+ return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM);
7571+}
7572+
7573+struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos)
7574+{
7575+ struct unionfs_dir_state *rdstate = NULL;
7576+ struct list_head *pos;
7577+
7578+ spin_lock(&UNIONFS_I(inode)->rdlock);
7579+ list_for_each(pos, &UNIONFS_I(inode)->readdircache) {
7580+ struct unionfs_dir_state *r =
7581+ list_entry(pos, struct unionfs_dir_state, cache);
7582+ if (fpos == rdstate2offset(r)) {
7583+ UNIONFS_I(inode)->rdcount--;
7584+ list_del(&r->cache);
7585+ rdstate = r;
7586+ break;
7587+ }
7588+ }
7589+ spin_unlock(&UNIONFS_I(inode)->rdlock);
7590+ return rdstate;
7591+}
7592+
7593+struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex)
7594+{
7595+ int i = 0;
7596+ int hashsize;
7597+ unsigned long mallocsize = sizeof(struct unionfs_dir_state);
7598+ struct unionfs_dir_state *rdstate;
7599+
7600+ hashsize = guesstimate_hash_size(inode);
7601+ mallocsize += hashsize * sizeof(struct list_head);
7602+ mallocsize = __roundup_pow_of_two(mallocsize);
7603+
7604+ /* This should give us about 500 entries anyway. */
7605+ if (mallocsize > PAGE_SIZE)
7606+ mallocsize = PAGE_SIZE;
7607+
7608+ hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) /
7609+ sizeof(struct list_head);
7610+
7611+ rdstate = kmalloc(mallocsize, GFP_KERNEL);
7612+ if (unlikely(!rdstate))
7613+ return NULL;
7614+
7615+ spin_lock(&UNIONFS_I(inode)->rdlock);
7616+ if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1))
7617+ UNIONFS_I(inode)->cookie = 1;
7618+ else
7619+ UNIONFS_I(inode)->cookie++;
7620+
7621+ rdstate->cookie = UNIONFS_I(inode)->cookie;
7622+ spin_unlock(&UNIONFS_I(inode)->rdlock);
7623+ rdstate->offset = 1;
7624+ rdstate->access = jiffies;
7625+ rdstate->bindex = bindex;
7626+ rdstate->dirpos = 0;
7627+ rdstate->hashentries = 0;
7628+ rdstate->size = hashsize;
7629+ for (i = 0; i < rdstate->size; i++)
7630+ INIT_LIST_HEAD(&rdstate->list[i]);
7631+
7632+ return rdstate;
7633+}
7634+
7635+static void free_filldir_node(struct filldir_node *node)
7636+{
7637+ if (node->namelen >= DNAME_INLINE_LEN)
7638+ kfree(node->name);
7639+ kmem_cache_free(unionfs_filldir_cachep, node);
7640+}
7641+
7642+void free_rdstate(struct unionfs_dir_state *state)
7643+{
7644+ struct filldir_node *tmp;
7645+ int i;
7646+
7647+ for (i = 0; i < state->size; i++) {
7648+ struct list_head *head = &(state->list[i]);
7649+ struct list_head *pos, *n;
7650+
7651+ /* traverse the list and deallocate space */
7652+ list_for_each_safe(pos, n, head) {
7653+ tmp = list_entry(pos, struct filldir_node, file_list);
7654+ list_del(&tmp->file_list);
7655+ free_filldir_node(tmp);
7656+ }
7657+ }
7658+
7659+ kfree(state);
7660+}
7661+
7662+struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
7663+ const char *name, int namelen,
7664+ int is_whiteout)
7665+{
7666+ int index;
7667+ unsigned int hash;
7668+ struct list_head *head;
7669+ struct list_head *pos;
7670+ struct filldir_node *cursor = NULL;
7671+ int found = 0;
7672+
7673+ BUG_ON(namelen <= 0);
7674+
7675+ hash = full_name_hash(name, namelen);
7676+ index = hash % rdstate->size;
7677+
7678+ head = &(rdstate->list[index]);
7679+ list_for_each(pos, head) {
7680+ cursor = list_entry(pos, struct filldir_node, file_list);
7681+
7682+ if (cursor->namelen == namelen && cursor->hash == hash &&
7683+ !strncmp(cursor->name, name, namelen)) {
7684+ /*
7685+ * a duplicate exists, and hence no need to create
7686+ * entry to the list
7687+ */
7688+ found = 1;
7689+
7690+ /*
7691+ * if a duplicate is found in this branch, and is
7692+ * not due to the caller looking for an entry to
7693+ * whiteout, then the file system may be corrupted.
7694+ */
7695+ if (unlikely(!is_whiteout &&
7696+ cursor->bindex == rdstate->bindex))
7697+ printk(KERN_ERR "unionfs: filldir: possible "
7698+ "I/O error: a file is duplicated "
7699+ "in the same branch %d: %s\n",
7700+ rdstate->bindex, cursor->name);
7701+ break;
7702+ }
7703+ }
7704+
7705+ if (!found)
7706+ cursor = NULL;
7707+
7708+ return cursor;
7709+}
7710+
7711+int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
7712+ int namelen, int bindex, int whiteout)
7713+{
7714+ struct filldir_node *new;
7715+ unsigned int hash;
7716+ int index;
7717+ int err = 0;
7718+ struct list_head *head;
7719+
7720+ BUG_ON(namelen <= 0);
7721+
7722+ hash = full_name_hash(name, namelen);
7723+ index = hash % rdstate->size;
7724+ head = &(rdstate->list[index]);
7725+
7726+ new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL);
7727+ if (unlikely(!new)) {
7728+ err = -ENOMEM;
7729+ goto out;
7730+ }
7731+
7732+ INIT_LIST_HEAD(&new->file_list);
7733+ new->namelen = namelen;
7734+ new->hash = hash;
7735+ new->bindex = bindex;
7736+ new->whiteout = whiteout;
7737+
7738+ if (namelen < DNAME_INLINE_LEN) {
7739+ new->name = new->iname;
7740+ } else {
7741+ new->name = kmalloc(namelen + 1, GFP_KERNEL);
7742+ if (unlikely(!new->name)) {
7743+ kmem_cache_free(unionfs_filldir_cachep, new);
7744+ new = NULL;
7745+ goto out;
7746+ }
7747+ }
7748+
7749+ memcpy(new->name, name, namelen);
7750+ new->name[namelen] = '\0';
7751+
7752+ rdstate->hashentries++;
7753+
7754+ list_add(&(new->file_list), head);
7755+out:
7756+ return err;
7757+}
7758diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
7759new file mode 100644
7760index 0000000..c8ab910
7761--- /dev/null
7762+++ b/fs/unionfs/rename.c
7763@@ -0,0 +1,522 @@
7764+/*
7765+ * Copyright (c) 2003-2011 Erez Zadok
7766+ * Copyright (c) 2003-2006 Charles P. Wright
7767+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7768+ * Copyright (c) 2005-2006 Junjiro Okajima
7769+ * Copyright (c) 2005 Arun M. Krishnakumar
7770+ * Copyright (c) 2004-2006 David P. Quigley
7771+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7772+ * Copyright (c) 2003 Puja Gupta
7773+ * Copyright (c) 2003 Harikesavan Krishnan
7774+ * Copyright (c) 2003-2011 Stony Brook University
7775+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
7776+ *
7777+ * This program is free software; you can redistribute it and/or modify
7778+ * it under the terms of the GNU General Public License version 2 as
7779+ * published by the Free Software Foundation.
7780+ */
7781+
7782+#include "union.h"
7783+
7784+/*
7785+ * This is a helper function for rename, used when rename ends up with hosed
7786+ * over dentries and we need to revert.
7787+ */
7788+static int unionfs_refresh_lower_dentry(struct dentry *dentry,
7789+ struct dentry *parent, int bindex)
7790+{
7791+ struct dentry *lower_dentry;
7792+ struct dentry *lower_parent;
7793+ int err = 0;
7794+ struct nameidata lower_nd;
7795+
7796+ verify_locked(dentry);
7797+
7798+ lower_parent = unionfs_lower_dentry_idx(parent, bindex);
7799+
7800+ BUG_ON(!S_ISDIR(lower_parent->d_inode->i_mode));
7801+
7802+ err = init_lower_nd(&lower_nd, LOOKUP_OPEN);
7803+ if (unlikely(err < 0))
7804+ goto out;
7805+ lower_dentry = lookup_one_len_nd(dentry->d_name.name, lower_parent,
7806+ dentry->d_name.len, &lower_nd);
7807+ release_lower_nd(&lower_nd, err);
7808+ if (IS_ERR(lower_dentry)) {
7809+ err = PTR_ERR(lower_dentry);
7810+ goto out;
7811+ }
7812+
7813+ dput(unionfs_lower_dentry_idx(dentry, bindex));
7814+ iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
7815+ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL);
7816+
7817+ if (!lower_dentry->d_inode) {
7818+ dput(lower_dentry);
7819+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
7820+ } else {
7821+ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
7822+ unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
7823+ igrab(lower_dentry->d_inode));
7824+ }
7825+
7826+out:
7827+ return err;
7828+}
7829+
7830+static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7831+ struct dentry *old_parent,
7832+ struct inode *new_dir, struct dentry *new_dentry,
7833+ struct dentry *new_parent,
7834+ int bindex)
7835+{
7836+ int err = 0;
7837+ struct dentry *lower_old_dentry;
7838+ struct dentry *lower_new_dentry;
7839+ struct dentry *lower_old_dir_dentry;
7840+ struct dentry *lower_new_dir_dentry;
7841+ struct dentry *trap;
7842+
7843+ lower_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7844+ lower_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex);
7845+
7846+ if (!lower_new_dentry) {
7847+ lower_new_dentry =
7848+ create_parents(new_parent->d_inode,
7849+ new_dentry, new_dentry->d_name.name,
7850+ bindex);
7851+ if (IS_ERR(lower_new_dentry)) {
7852+ err = PTR_ERR(lower_new_dentry);
7853+ if (IS_COPYUP_ERR(err))
7854+ goto out;
7855+ printk(KERN_ERR "unionfs: error creating directory "
7856+ "tree for rename, bindex=%d err=%d\n",
7857+ bindex, err);
7858+ goto out;
7859+ }
7860+ }
7861+
7862+ /* check for and remove whiteout, if any */
7863+ err = check_unlink_whiteout(new_dentry, lower_new_dentry, bindex);
7864+ if (err > 0) /* ignore if whiteout found and successfully removed */
7865+ err = 0;
7866+ if (err)
7867+ goto out;
7868+
7869+ /* check of old_dentry branch is writable */
7870+ err = is_robranch_super(old_dentry->d_sb, bindex);
7871+ if (err)
7872+ goto out;
7873+
7874+ dget(lower_old_dentry);
7875+ dget(lower_new_dentry);
7876+ lower_old_dir_dentry = dget_parent(lower_old_dentry);
7877+ lower_new_dir_dentry = dget_parent(lower_new_dentry);
7878+
7879+ trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7880+ /* source should not be ancenstor of target */
7881+ if (trap == lower_old_dentry) {
7882+ err = -EINVAL;
7883+ goto out_err_unlock;
7884+ }
7885+ /* target should not be ancenstor of source */
7886+ if (trap == lower_new_dentry) {
7887+ err = -ENOTEMPTY;
7888+ goto out_err_unlock;
7889+ }
7890+ err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
7891+ lower_new_dir_dentry->d_inode, lower_new_dentry);
7892+out_err_unlock:
7893+ if (!err) {
7894+ /* update parent dir times */
7895+ fsstack_copy_attr_times(old_dir, lower_old_dir_dentry->d_inode);
7896+ fsstack_copy_attr_times(new_dir, lower_new_dir_dentry->d_inode);
7897+ }
7898+ unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7899+
7900+ dput(lower_old_dir_dentry);
7901+ dput(lower_new_dir_dentry);
7902+ dput(lower_old_dentry);
7903+ dput(lower_new_dentry);
7904+
7905+out:
7906+ if (!err) {
7907+ /* Fixup the new_dentry. */
7908+ if (bindex < dbstart(new_dentry))
7909+ dbstart(new_dentry) = bindex;
7910+ else if (bindex > dbend(new_dentry))
7911+ dbend(new_dentry) = bindex;
7912+ }
7913+
7914+ return err;
7915+}
7916+
7917+/*
7918+ * Main rename code. This is sufficiently complex, that it's documented in
7919+ * Documentation/filesystems/unionfs/rename.txt. This routine calls
7920+ * __unionfs_rename() above to perform some of the work.
7921+ */
7922+static int do_unionfs_rename(struct inode *old_dir,
7923+ struct dentry *old_dentry,
7924+ struct dentry *old_parent,
7925+ struct inode *new_dir,
7926+ struct dentry *new_dentry,
7927+ struct dentry *new_parent)
7928+{
7929+ int err = 0;
7930+ int bindex;
7931+ int old_bstart, old_bend;
7932+ int new_bstart, new_bend;
7933+ int do_copyup = -1;
7934+ int local_err = 0;
7935+ int eio = 0;
7936+ int revert = 0;
7937+
7938+ old_bstart = dbstart(old_dentry);
7939+ old_bend = dbend(old_dentry);
7940+
7941+ new_bstart = dbstart(new_dentry);
7942+ new_bend = dbend(new_dentry);
7943+
7944+ /* Rename source to destination. */
7945+ err = __unionfs_rename(old_dir, old_dentry, old_parent,
7946+ new_dir, new_dentry, new_parent,
7947+ old_bstart);
7948+ if (err) {
7949+ if (!IS_COPYUP_ERR(err))
7950+ goto out;
7951+ do_copyup = old_bstart - 1;
7952+ } else {
7953+ revert = 1;
7954+ }
7955+
7956+ /*
7957+ * Unlink all instances of destination that exist to the left of
7958+ * bstart of source. On error, revert back, goto out.
7959+ */
7960+ for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) {
7961+ struct dentry *unlink_dentry;
7962+ struct dentry *unlink_dir_dentry;
7963+
7964+ BUG_ON(bindex < 0);
7965+ unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7966+ if (!unlink_dentry)
7967+ continue;
7968+
7969+ unlink_dir_dentry = lock_parent(unlink_dentry);
7970+ err = is_robranch_super(old_dir->i_sb, bindex);
7971+ if (!err)
7972+ err = vfs_unlink(unlink_dir_dentry->d_inode,
7973+ unlink_dentry);
7974+
7975+ fsstack_copy_attr_times(new_parent->d_inode,
7976+ unlink_dir_dentry->d_inode);
7977+ /* propagate number of hard-links */
7978+ new_parent->d_inode->i_nlink =
7979+ unionfs_get_nlinks(new_parent->d_inode);
7980+
7981+ unlock_dir(unlink_dir_dentry);
7982+ if (!err) {
7983+ if (bindex != new_bstart) {
7984+ dput(unlink_dentry);
7985+ unionfs_set_lower_dentry_idx(new_dentry,
7986+ bindex, NULL);
7987+ }
7988+ } else if (IS_COPYUP_ERR(err)) {
7989+ do_copyup = bindex - 1;
7990+ } else if (revert) {
7991+ goto revert;
7992+ }
7993+ }
7994+
7995+ if (do_copyup != -1) {
7996+ for (bindex = do_copyup; bindex >= 0; bindex--) {
7997+ /*
7998+ * copyup the file into some left directory, so that
7999+ * you can rename it
8000+ */
8001+ err = copyup_dentry(old_parent->d_inode,
8002+ old_dentry, old_bstart, bindex,
8003+ old_dentry->d_name.name,
8004+ old_dentry->d_name.len, NULL,
8005+ i_size_read(old_dentry->d_inode));
8006+ /* if copyup failed, try next branch to the left */
8007+ if (err)
8008+ continue;
8009+ /*
8010+ * create whiteout before calling __unionfs_rename
8011+ * because the latter will change the old_dentry's
8012+ * lower name and parent dir, resulting in the
8013+ * whiteout getting created in the wrong dir.
8014+ */
8015+ err = create_whiteout(old_dentry, bindex);
8016+ if (err) {
8017+ printk(KERN_ERR "unionfs: can't create a "
8018+ "whiteout for %s in rename (err=%d)\n",
8019+ old_dentry->d_name.name, err);
8020+ continue;
8021+ }
8022+ err = __unionfs_rename(old_dir, old_dentry, old_parent,
8023+ new_dir, new_dentry, new_parent,
8024+ bindex);
8025+ break;
8026+ }
8027+ }
8028+
8029+ /* make it opaque */
8030+ if (S_ISDIR(old_dentry->d_inode->i_mode)) {
8031+ err = make_dir_opaque(old_dentry, dbstart(old_dentry));
8032+ if (err)
8033+ goto revert;
8034+ }
8035+
8036+ /*
8037+ * Create whiteout for source, only if:
8038+ * (1) There is more than one underlying instance of source.
8039+ * (We did a copy_up is taken care of above).
8040+ */
8041+ if ((old_bstart != old_bend) && (do_copyup == -1)) {
8042+ err = create_whiteout(old_dentry, old_bstart);
8043+ if (err) {
8044+ /* can't fix anything now, so we exit with -EIO */
8045+ printk(KERN_ERR "unionfs: can't create a whiteout for "
8046+ "%s in rename!\n", old_dentry->d_name.name);
8047+ err = -EIO;
8048+ }
8049+ }
8050+
8051+out:
8052+ return err;
8053+
8054+revert:
8055+ /* Do revert here. */
8056+ local_err = unionfs_refresh_lower_dentry(new_dentry, new_parent,
8057+ old_bstart);
8058+ if (local_err) {
8059+ printk(KERN_ERR "unionfs: revert failed in rename: "
8060+ "the new refresh failed\n");
8061+ eio = -EIO;
8062+ }
8063+
8064+ local_err = unionfs_refresh_lower_dentry(old_dentry, old_parent,
8065+ old_bstart);
8066+ if (local_err) {
8067+ printk(KERN_ERR "unionfs: revert failed in rename: "
8068+ "the old refresh failed\n");
8069+ eio = -EIO;
8070+ goto revert_out;
8071+ }
8072+
8073+ if (!unionfs_lower_dentry_idx(new_dentry, bindex) ||
8074+ !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) {
8075+ printk(KERN_ERR "unionfs: revert failed in rename: "
8076+ "the object disappeared from under us!\n");
8077+ eio = -EIO;
8078+ goto revert_out;
8079+ }
8080+
8081+ if (unionfs_lower_dentry_idx(old_dentry, bindex) &&
8082+ unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) {
8083+ printk(KERN_ERR "unionfs: revert failed in rename: "
8084+ "the object was created underneath us!\n");
8085+ eio = -EIO;
8086+ goto revert_out;
8087+ }
8088+
8089+ local_err = __unionfs_rename(new_dir, new_dentry, new_parent,
8090+ old_dir, old_dentry, old_parent,
8091+ old_bstart);
8092+
8093+ /* If we can't fix it, then we cop-out with -EIO. */
8094+ if (local_err) {
8095+ printk(KERN_ERR "unionfs: revert failed in rename!\n");
8096+ eio = -EIO;
8097+ }
8098+
8099+ local_err = unionfs_refresh_lower_dentry(new_dentry, new_parent,
8100+ bindex);
8101+ if (local_err)
8102+ eio = -EIO;
8103+ local_err = unionfs_refresh_lower_dentry(old_dentry, old_parent,
8104+ bindex);
8105+ if (local_err)
8106+ eio = -EIO;
8107+
8108+revert_out:
8109+ if (eio)
8110+ err = eio;
8111+ return err;
8112+}
8113+
8114+/*
8115+ * We can't copyup a directory, because it may involve huge numbers of
8116+ * children, etc. Doing that in the kernel would be bad, so instead we
8117+ * return EXDEV to the user-space utility that caused this, and let the
8118+ * user-space recurse and ask us to copy up each file separately.
8119+ */
8120+static int may_rename_dir(struct dentry *dentry, struct dentry *parent)
8121+{
8122+ int err, bstart;
8123+
8124+ err = check_empty(dentry, parent, NULL);
8125+ if (err == -ENOTEMPTY) {
8126+ if (is_robranch(dentry))
8127+ return -EXDEV;
8128+ } else if (err) {
8129+ return err;
8130+ }
8131+
8132+ bstart = dbstart(dentry);
8133+ if (dbend(dentry) == bstart || dbopaque(dentry) == bstart)
8134+ return 0;
8135+
8136+ dbstart(dentry) = bstart + 1;
8137+ err = check_empty(dentry, parent, NULL);
8138+ dbstart(dentry) = bstart;
8139+ if (err == -ENOTEMPTY)
8140+ err = -EXDEV;
8141+ return err;
8142+}
8143+
8144+/*
8145+ * The locking rules in unionfs_rename are complex. We could use a simpler
8146+ * superblock-level name-space lock for renames and copy-ups.
8147+ */
8148+int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8149+ struct inode *new_dir, struct dentry *new_dentry)
8150+{
8151+ int err = 0;
8152+ struct dentry *wh_dentry;
8153+ struct dentry *old_parent, *new_parent;
8154+ int valid = true;
8155+
8156+ unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
8157+ old_parent = dget_parent(old_dentry);
8158+ new_parent = dget_parent(new_dentry);
8159+ /* un/lock parent dentries only if they differ from old/new_dentry */
8160+ if (old_parent != old_dentry &&
8161+ old_parent != new_dentry)
8162+ unionfs_lock_dentry(old_parent, UNIONFS_DMUTEX_REVAL_PARENT);
8163+ if (new_parent != old_dentry &&
8164+ new_parent != new_dentry &&
8165+ new_parent != old_parent)
8166+ unionfs_lock_dentry(new_parent, UNIONFS_DMUTEX_REVAL_CHILD);
8167+ unionfs_double_lock_dentry(old_dentry, new_dentry);
8168+
8169+ valid = __unionfs_d_revalidate(old_dentry, old_parent, false);
8170+ if (!valid) {
8171+ err = -ESTALE;
8172+ goto out;
8173+ }
8174+ if (!d_deleted(new_dentry) && new_dentry->d_inode) {
8175+ valid = __unionfs_d_revalidate(new_dentry, new_parent, false);
8176+ if (!valid) {
8177+ err = -ESTALE;
8178+ goto out;
8179+ }
8180+ }
8181+
8182+ if (!S_ISDIR(old_dentry->d_inode->i_mode))
8183+ err = unionfs_partial_lookup(old_dentry, old_parent);
8184+ else
8185+ err = may_rename_dir(old_dentry, old_parent);
8186+
8187+ if (err)
8188+ goto out;
8189+
8190+ err = unionfs_partial_lookup(new_dentry, new_parent);
8191+ if (err)
8192+ goto out;
8193+
8194+ /*
8195+ * if new_dentry is already lower because of whiteout,
8196+ * simply override it even if the whited-out dir is not empty.
8197+ */
8198+ wh_dentry = find_first_whiteout(new_dentry);
8199+ if (!IS_ERR(wh_dentry)) {
8200+ dput(wh_dentry);
8201+ } else if (new_dentry->d_inode) {
8202+ if (S_ISDIR(old_dentry->d_inode->i_mode) !=
8203+ S_ISDIR(new_dentry->d_inode->i_mode)) {
8204+ err = S_ISDIR(old_dentry->d_inode->i_mode) ?
8205+ -ENOTDIR : -EISDIR;
8206+ goto out;
8207+ }
8208+
8209+ if (S_ISDIR(new_dentry->d_inode->i_mode)) {
8210+ struct unionfs_dir_state *namelist = NULL;
8211+ /* check if this unionfs directory is empty or not */
8212+ err = check_empty(new_dentry, new_parent, &namelist);
8213+ if (err)
8214+ goto out;
8215+
8216+ if (!is_robranch(new_dentry))
8217+ err = delete_whiteouts(new_dentry,
8218+ dbstart(new_dentry),
8219+ namelist);
8220+
8221+ free_rdstate(namelist);
8222+
8223+ if (err)
8224+ goto out;
8225+ }
8226+ }
8227+
8228+ err = do_unionfs_rename(old_dir, old_dentry, old_parent,
8229+ new_dir, new_dentry, new_parent);
8230+ if (err)
8231+ goto out;
8232+
8233+ /*
8234+ * force re-lookup since the dir on ro branch is not renamed, and
8235+ * lower dentries still indicate the un-renamed ones.
8236+ */
8237+ if (S_ISDIR(old_dentry->d_inode->i_mode))
8238+ atomic_dec(&UNIONFS_D(old_dentry)->generation);
8239+ else
8240+ unionfs_postcopyup_release(old_dentry);
8241+ if (new_dentry->d_inode && !S_ISDIR(new_dentry->d_inode->i_mode)) {
8242+ unionfs_postcopyup_release(new_dentry);
8243+ unionfs_postcopyup_setmnt(new_dentry);
8244+ if (!unionfs_lower_inode(new_dentry->d_inode)) {
8245+ /*
8246+ * If we get here, it means that no copyup was
8247+ * needed, and that a file by the old name already
8248+ * existing on the destination branch; that file got
8249+ * renamed earlier in this function, so all we need
8250+ * to do here is set the lower inode.
8251+ */
8252+ struct inode *inode;
8253+ inode = unionfs_lower_inode(old_dentry->d_inode);
8254+ igrab(inode);
8255+ unionfs_set_lower_inode_idx(new_dentry->d_inode,
8256+ dbstart(new_dentry),
8257+ inode);
8258+ }
8259+ }
8260+ /* if all of this renaming succeeded, update our times */
8261+ unionfs_copy_attr_times(old_dentry->d_inode);
8262+ unionfs_copy_attr_times(new_dentry->d_inode);
8263+ unionfs_check_inode(old_dir);
8264+ unionfs_check_inode(new_dir);
8265+ unionfs_check_dentry(old_dentry);
8266+ unionfs_check_dentry(new_dentry);
8267+
8268+out:
8269+ if (err) /* clear the new_dentry stuff created */
8270+ d_drop(new_dentry);
8271+
8272+ unionfs_double_unlock_dentry(old_dentry, new_dentry);
8273+ if (new_parent != old_dentry &&
8274+ new_parent != new_dentry &&
8275+ new_parent != old_parent)
8276+ unionfs_unlock_dentry(new_parent);
8277+ if (old_parent != old_dentry &&
8278+ old_parent != new_dentry)
8279+ unionfs_unlock_dentry(old_parent);
8280+ dput(new_parent);
8281+ dput(old_parent);
8282+ unionfs_read_unlock(old_dentry->d_sb);
8283+
8284+ return err;
8285+}
8286diff --git a/fs/unionfs/sioq.c b/fs/unionfs/sioq.c
8287new file mode 100644
8288index 0000000..b923742
8289--- /dev/null
8290+++ b/fs/unionfs/sioq.c
8291@@ -0,0 +1,101 @@
8292+/*
8293+ * Copyright (c) 2006-2011 Erez Zadok
8294+ * Copyright (c) 2006 Charles P. Wright
8295+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8296+ * Copyright (c) 2006 Junjiro Okajima
8297+ * Copyright (c) 2006 David P. Quigley
8298+ * Copyright (c) 2006-2011 Stony Brook University
8299+ * Copyright (c) 2006-2011 The Research Foundation of SUNY
8300+ *
8301+ * This program is free software; you can redistribute it and/or modify
8302+ * it under the terms of the GNU General Public License version 2 as
8303+ * published by the Free Software Foundation.
8304+ */
8305+
8306+#include "union.h"
8307+
8308+/*
8309+ * Super-user IO work Queue - sometimes we need to perform actions which
8310+ * would fail due to the unix permissions on the parent directory (e.g.,
8311+ * rmdir a directory which appears empty, but in reality contains
8312+ * whiteouts).
8313+ */
8314+
8315+static struct workqueue_struct *superio_workqueue;
8316+
8317+int __init init_sioq(void)
8318+{
8319+ int err;
8320+
8321+ superio_workqueue = create_workqueue("unionfs_siod");
8322+ if (!IS_ERR(superio_workqueue))
8323+ return 0;
8324+
8325+ err = PTR_ERR(superio_workqueue);
8326+ printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err);
8327+ superio_workqueue = NULL;
8328+ return err;
8329+}
8330+
8331+void stop_sioq(void)
8332+{
8333+ if (superio_workqueue)
8334+ destroy_workqueue(superio_workqueue);
8335+}
8336+
8337+void run_sioq(work_func_t func, struct sioq_args *args)
8338+{
8339+ INIT_WORK(&args->work, func);
8340+
8341+ init_completion(&args->comp);
8342+ while (!queue_work(superio_workqueue, &args->work)) {
8343+ /* TODO: do accounting if needed */
8344+ schedule();
8345+ }
8346+ wait_for_completion(&args->comp);
8347+}
8348+
8349+void __unionfs_create(struct work_struct *work)
8350+{
8351+ struct sioq_args *args = container_of(work, struct sioq_args, work);
8352+ struct create_args *c = &args->create;
8353+
8354+ args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd);
8355+ complete(&args->comp);
8356+}
8357+
8358+void __unionfs_mkdir(struct work_struct *work)
8359+{
8360+ struct sioq_args *args = container_of(work, struct sioq_args, work);
8361+ struct mkdir_args *m = &args->mkdir;
8362+
8363+ args->err = vfs_mkdir(m->parent, m->dentry, m->mode);
8364+ complete(&args->comp);
8365+}
8366+
8367+void __unionfs_mknod(struct work_struct *work)
8368+{
8369+ struct sioq_args *args = container_of(work, struct sioq_args, work);
8370+ struct mknod_args *m = &args->mknod;
8371+
8372+ args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev);
8373+ complete(&args->comp);
8374+}
8375+
8376+void __unionfs_symlink(struct work_struct *work)
8377+{
8378+ struct sioq_args *args = container_of(work, struct sioq_args, work);
8379+ struct symlink_args *s = &args->symlink;
8380+
8381+ args->err = vfs_symlink(s->parent, s->dentry, s->symbuf);
8382+ complete(&args->comp);
8383+}
8384+
8385+void __unionfs_unlink(struct work_struct *work)
8386+{
8387+ struct sioq_args *args = container_of(work, struct sioq_args, work);
8388+ struct unlink_args *u = &args->unlink;
8389+
8390+ args->err = vfs_unlink(u->parent, u->dentry);
8391+ complete(&args->comp);
8392+}
8393diff --git a/fs/unionfs/sioq.h b/fs/unionfs/sioq.h
8394new file mode 100644
8395index 0000000..c2dfb94
8396--- /dev/null
8397+++ b/fs/unionfs/sioq.h
8398@@ -0,0 +1,91 @@
8399+/*
8400+ * Copyright (c) 2006-2011 Erez Zadok
8401+ * Copyright (c) 2006 Charles P. Wright
8402+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8403+ * Copyright (c) 2006 Junjiro Okajima
8404+ * Copyright (c) 2006 David P. Quigley
8405+ * Copyright (c) 2006-2011 Stony Brook University
8406+ * Copyright (c) 2006-2011 The Research Foundation of SUNY
8407+ *
8408+ * This program is free software; you can redistribute it and/or modify
8409+ * it under the terms of the GNU General Public License version 2 as
8410+ * published by the Free Software Foundation.
8411+ */
8412+
8413+#ifndef _SIOQ_H
8414+#define _SIOQ_H
8415+
8416+struct deletewh_args {
8417+ struct unionfs_dir_state *namelist;
8418+ struct dentry *dentry;
8419+ int bindex;
8420+};
8421+
8422+struct is_opaque_args {
8423+ struct dentry *dentry;
8424+};
8425+
8426+struct create_args {
8427+ struct inode *parent;
8428+ struct dentry *dentry;
8429+ umode_t mode;
8430+ struct nameidata *nd;
8431+};
8432+
8433+struct mkdir_args {
8434+ struct inode *parent;
8435+ struct dentry *dentry;
8436+ umode_t mode;
8437+};
8438+
8439+struct mknod_args {
8440+ struct inode *parent;
8441+ struct dentry *dentry;
8442+ umode_t mode;
8443+ dev_t dev;
8444+};
8445+
8446+struct symlink_args {
8447+ struct inode *parent;
8448+ struct dentry *dentry;
8449+ char *symbuf;
8450+};
8451+
8452+struct unlink_args {
8453+ struct inode *parent;
8454+ struct dentry *dentry;
8455+};
8456+
8457+
8458+struct sioq_args {
8459+ struct completion comp;
8460+ struct work_struct work;
8461+ int err;
8462+ void *ret;
8463+
8464+ union {
8465+ struct deletewh_args deletewh;
8466+ struct is_opaque_args is_opaque;
8467+ struct create_args create;
8468+ struct mkdir_args mkdir;
8469+ struct mknod_args mknod;
8470+ struct symlink_args symlink;
8471+ struct unlink_args unlink;
8472+ };
8473+};
8474+
8475+/* Extern definitions for SIOQ functions */
8476+extern int __init init_sioq(void);
8477+extern void stop_sioq(void);
8478+extern void run_sioq(work_func_t func, struct sioq_args *args);
8479+
8480+/* Extern definitions for our privilege escalation helpers */
8481+extern void __unionfs_create(struct work_struct *work);
8482+extern void __unionfs_mkdir(struct work_struct *work);
8483+extern void __unionfs_mknod(struct work_struct *work);
8484+extern void __unionfs_symlink(struct work_struct *work);
8485+extern void __unionfs_unlink(struct work_struct *work);
8486+extern void __delete_whiteouts(struct work_struct *work);
8487+extern void __is_opaque_dir(struct work_struct *work);
8488+
8489+#endif /* not _SIOQ_H */
8490diff --git a/fs/unionfs/subr.c b/fs/unionfs/subr.c
8491new file mode 100644
8492index 0000000..bdca2f7
8493--- /dev/null
8494+++ b/fs/unionfs/subr.c
8495@@ -0,0 +1,95 @@
8496+/*
8497+ * Copyright (c) 2003-2011 Erez Zadok
8498+ * Copyright (c) 2003-2006 Charles P. Wright
8499+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8500+ * Copyright (c) 2005-2006 Junjiro Okajima
8501+ * Copyright (c) 2005 Arun M. Krishnakumar
8502+ * Copyright (c) 2004-2006 David P. Quigley
8503+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8504+ * Copyright (c) 2003 Puja Gupta
8505+ * Copyright (c) 2003 Harikesavan Krishnan
8506+ * Copyright (c) 2003-2011 Stony Brook University
8507+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
8508+ *
8509+ * This program is free software; you can redistribute it and/or modify
8510+ * it under the terms of the GNU General Public License version 2 as
8511+ * published by the Free Software Foundation.
8512+ */
8513+
8514+#include "union.h"
8515+
8516+/*
8517+ * returns the right n_link value based on the inode type
8518+ */
8519+int unionfs_get_nlinks(const struct inode *inode)
8520+{
8521+ /* don't bother to do all the work since we're unlinked */
8522+ if (inode->i_nlink == 0)
8523+ return 0;
8524+
8525+ if (!S_ISDIR(inode->i_mode))
8526+ return unionfs_lower_inode(inode)->i_nlink;
8527+
8528+ /*
8529+ * For directories, we return 1. The only place that could cares
8530+ * about links is readdir, and there's d_type there so even that
8531+ * doesn't matter.
8532+ */
8533+ return 1;
8534+}
8535+
8536+/* copy a/m/ctime from the lower branch with the newest times */
8537+void unionfs_copy_attr_times(struct inode *upper)
8538+{
8539+ int bindex;
8540+ struct inode *lower;
8541+
8542+ if (!upper)
8543+ return;
8544+ if (ibstart(upper) < 0) {
8545+#ifdef CONFIG_UNION_FS_DEBUG
8546+ WARN_ON(ibstart(upper) < 0);
8547+#endif /* CONFIG_UNION_FS_DEBUG */
8548+ return;
8549+ }
8550+ for (bindex = ibstart(upper); bindex <= ibend(upper); bindex++) {
8551+ lower = unionfs_lower_inode_idx(upper, bindex);
8552+ if (!lower)
8553+ continue; /* not all lower dir objects may exist */
8554+ if (unlikely(timespec_compare(&upper->i_mtime,
8555+ &lower->i_mtime) < 0))
8556+ upper->i_mtime = lower->i_mtime;
8557+ if (unlikely(timespec_compare(&upper->i_ctime,
8558+ &lower->i_ctime) < 0))
8559+ upper->i_ctime = lower->i_ctime;
8560+ if (unlikely(timespec_compare(&upper->i_atime,
8561+ &lower->i_atime) < 0))
8562+ upper->i_atime = lower->i_atime;
8563+ }
8564+}
8565+
8566+/*
8567+ * A unionfs/fanout version of fsstack_copy_attr_all. Uses a
8568+ * unionfs_get_nlinks to properly calcluate the number of links to a file.
8569+ * Also, copies the max() of all a/m/ctimes for all lower inodes (which is
8570+ * important if the lower inode is a directory type)
8571+ */
8572+void unionfs_copy_attr_all(struct inode *dest,
8573+ const struct inode *src)
8574+{
8575+ dest->i_mode = src->i_mode;
8576+ dest->i_uid = src->i_uid;
8577+ dest->i_gid = src->i_gid;
8578+ dest->i_rdev = src->i_rdev;
8579+
8580+ unionfs_copy_attr_times(dest);
8581+
8582+ dest->i_blkbits = src->i_blkbits;
8583+ dest->i_flags = src->i_flags;
8584+
8585+ /*
8586+ * Update the nlinks AFTER updating the above fields, because the
8587+ * get_links callback may depend on them.
8588+ */
8589+ dest->i_nlink = unionfs_get_nlinks(dest);
8590+}
8591diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
8592new file mode 100644
8593index 0000000..c3ac814
8594--- /dev/null
8595+++ b/fs/unionfs/super.c
8596@@ -0,0 +1,1030 @@
8597+/*
8598+ * Copyright (c) 2003-2011 Erez Zadok
8599+ * Copyright (c) 2003-2006 Charles P. Wright
8600+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8601+ * Copyright (c) 2005-2006 Junjiro Okajima
8602+ * Copyright (c) 2005 Arun M. Krishnakumar
8603+ * Copyright (c) 2004-2006 David P. Quigley
8604+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8605+ * Copyright (c) 2003 Puja Gupta
8606+ * Copyright (c) 2003 Harikesavan Krishnan
8607+ * Copyright (c) 2003-2011 Stony Brook University
8608+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
8609+ *
8610+ * This program is free software; you can redistribute it and/or modify
8611+ * it under the terms of the GNU General Public License version 2 as
8612+ * published by the Free Software Foundation.
8613+ */
8614+
8615+#include "union.h"
8616+
8617+/*
8618+ * The inode cache is used with alloc_inode for both our inode info and the
8619+ * vfs inode.
8620+ */
8621+static struct kmem_cache *unionfs_inode_cachep;
8622+
8623+struct inode *unionfs_iget(struct super_block *sb, unsigned long ino)
8624+{
8625+ int size;
8626+ struct unionfs_inode_info *info;
8627+ struct inode *inode;
8628+
8629+ inode = iget_locked(sb, ino);
8630+ if (!inode)
8631+ return ERR_PTR(-ENOMEM);
8632+ if (!(inode->i_state & I_NEW))
8633+ return inode;
8634+
8635+ info = UNIONFS_I(inode);
8636+ memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode));
8637+ info->bstart = -1;
8638+ info->bend = -1;
8639+ atomic_set(&info->generation,
8640+ atomic_read(&UNIONFS_SB(inode->i_sb)->generation));
8641+ spin_lock_init(&info->rdlock);
8642+ info->rdcount = 1;
8643+ info->hashsize = -1;
8644+ INIT_LIST_HEAD(&info->readdircache);
8645+
8646+ size = sbmax(inode->i_sb) * sizeof(struct inode *);
8647+ info->lower_inodes = kzalloc(size, GFP_KERNEL);
8648+ if (unlikely(!info->lower_inodes)) {
8649+ printk(KERN_CRIT "unionfs: no kernel memory when allocating "
8650+ "lower-pointer array!\n");
8651+ iget_failed(inode);
8652+ return ERR_PTR(-ENOMEM);
8653+ }
8654+
8655+ inode->i_version++;
8656+ inode->i_op = &unionfs_main_iops;
8657+ inode->i_fop = &unionfs_main_fops;
8658+
8659+ inode->i_mapping->a_ops = &unionfs_aops;
8660+
8661+ /*
8662+ * reset times so unionfs_copy_attr_all can keep out time invariants
8663+ * right (upper inode time being the max of all lower ones).
8664+ */
8665+ inode->i_atime.tv_sec = inode->i_atime.tv_nsec = 0;
8666+ inode->i_mtime.tv_sec = inode->i_mtime.tv_nsec = 0;
8667+ inode->i_ctime.tv_sec = inode->i_ctime.tv_nsec = 0;
8668+ unlock_new_inode(inode);
8669+ return inode;
8670+}
8671+
8672+/*
8673+ * final actions when unmounting a file system
8674+ *
8675+ * No need to lock rwsem.
8676+ */
8677+static void unionfs_put_super(struct super_block *sb)
8678+{
8679+ int bindex, bstart, bend;
8680+ struct unionfs_sb_info *spd;
8681+ int leaks = 0;
8682+
8683+ spd = UNIONFS_SB(sb);
8684+ if (!spd)
8685+ return;
8686+
8687+ bstart = sbstart(sb);
8688+ bend = sbend(sb);
8689+
8690+ /* Make sure we have no leaks of branchget/branchput. */
8691+ for (bindex = bstart; bindex <= bend; bindex++)
8692+ if (unlikely(branch_count(sb, bindex) != 0)) {
8693+ printk(KERN_CRIT
8694+ "unionfs: branch %d has %d references left!\n",
8695+ bindex, branch_count(sb, bindex));
8696+ leaks = 1;
8697+ }
8698+ WARN_ON(leaks != 0);
8699+
8700+ /* decrement lower super references */
8701+ for (bindex = bstart; bindex <= bend; bindex++) {
8702+ struct super_block *s;
8703+ s = unionfs_lower_super_idx(sb, bindex);
8704+ unionfs_set_lower_super_idx(sb, bindex, NULL);
8705+ atomic_dec(&s->s_active);
8706+ }
8707+
8708+ kfree(spd->dev_name);
8709+ kfree(spd->data);
8710+ kfree(spd);
8711+ sb->s_fs_info = NULL;
8712+}
8713+
8714+/*
8715+ * Since people use this to answer the "How big of a file can I write?"
8716+ * question, we report the size of the highest priority branch as the size of
8717+ * the union.
8718+ */
8719+static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf)
8720+{
8721+ int err = 0;
8722+ struct super_block *sb;
8723+ struct dentry *lower_dentry;
8724+ struct dentry *parent;
8725+ struct path lower_path;
8726+ bool valid;
8727+
8728+ sb = dentry->d_sb;
8729+
8730+ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
8731+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
8732+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
8733+
8734+ valid = __unionfs_d_revalidate(dentry, parent, false);
8735+ if (unlikely(!valid)) {
8736+ err = -ESTALE;
8737+ goto out;
8738+ }
8739+ unionfs_check_dentry(dentry);
8740+
8741+ lower_dentry = unionfs_lower_dentry(sb->s_root);
8742+ lower_path.dentry = lower_dentry;
8743+ lower_path.mnt = unionfs_mntget(sb->s_root, 0);
8744+ err = vfs_statfs(&lower_path, buf);
8745+ mntput(lower_path.mnt);
8746+
8747+ /* set return buf to our f/s to avoid confusing user-level utils */
8748+ buf->f_type = UNIONFS_SUPER_MAGIC;
8749+ /*
8750+ * Our maximum file name can is shorter by a few bytes because every
8751+ * file name could potentially be whited-out.
8752+ *
8753+ * XXX: this restriction goes away with ODF.
8754+ */
8755+ unionfs_set_max_namelen(&buf->f_namelen);
8756+
8757+ /*
8758+ * reset two fields to avoid confusing user-land.
8759+ * XXX: is this still necessary?
8760+ */
8761+ memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t));
8762+ memset(&buf->f_spare, 0, sizeof(buf->f_spare));
8763+
8764+out:
8765+ unionfs_check_dentry(dentry);
8766+ unionfs_unlock_dentry(dentry);
8767+ unionfs_unlock_parent(dentry, parent);
8768+ unionfs_read_unlock(sb);
8769+ return err;
8770+}
8771+
8772+/* handle mode changing during remount */
8773+static noinline_for_stack int do_remount_mode_option(
8774+ char *optarg,
8775+ int cur_branches,
8776+ struct unionfs_data *new_data,
8777+ struct path *new_lower_paths)
8778+{
8779+ int err = -EINVAL;
8780+ int perms, idx;
8781+ char *modename = strchr(optarg, '=');
8782+ struct path path;
8783+
8784+ /* by now, optarg contains the branch name */
8785+ if (!*optarg) {
8786+ printk(KERN_ERR
8787+ "unionfs: no branch specified for mode change\n");
8788+ goto out;
8789+ }
8790+ if (!modename) {
8791+ printk(KERN_ERR "unionfs: branch \"%s\" requires a mode\n",
8792+ optarg);
8793+ goto out;
8794+ }
8795+ *modename++ = '\0';
8796+ err = parse_branch_mode(modename, &perms);
8797+ if (err) {
8798+ printk(KERN_ERR "unionfs: invalid mode \"%s\" for \"%s\"\n",
8799+ modename, optarg);
8800+ goto out;
8801+ }
8802+
8803+ /*
8804+ * Find matching branch index. For now, this assumes that nothing
8805+ * has been mounted on top of this Unionfs stack. Once we have /odf
8806+ * and cache-coherency resolved, we'll address the branch-path
8807+ * uniqueness.
8808+ */
8809+ err = kern_path(optarg, LOOKUP_FOLLOW, &path);
8810+ if (err) {
8811+ printk(KERN_ERR "unionfs: error accessing "
8812+ "lower directory \"%s\" (error %d)\n",
8813+ optarg, err);
8814+ goto out;
8815+ }
8816+ for (idx = 0; idx < cur_branches; idx++)
8817+ if (path.mnt == new_lower_paths[idx].mnt &&
8818+ path.dentry == new_lower_paths[idx].dentry)
8819+ break;
8820+ path_put(&path); /* no longer needed */
8821+ if (idx == cur_branches) {
8822+ err = -ENOENT; /* err may have been reset above */
8823+ printk(KERN_ERR "unionfs: branch \"%s\" "
8824+ "not found\n", optarg);
8825+ goto out;
8826+ }
8827+ /* check/change mode for existing branch */
8828+ /* we don't warn if perms==branchperms */
8829+ new_data[idx].branchperms = perms;
8830+ err = 0;
8831+out:
8832+ return err;
8833+}
8834+
8835+/* handle branch deletion during remount */
8836+static noinline_for_stack int do_remount_del_option(
8837+ char *optarg, int cur_branches,
8838+ struct unionfs_data *new_data,
8839+ struct path *new_lower_paths)
8840+{
8841+ int err = -EINVAL;
8842+ int idx;
8843+ struct path path;
8844+
8845+ /* optarg contains the branch name to delete */
8846+
8847+ /*
8848+ * Find matching branch index. For now, this assumes that nothing
8849+ * has been mounted on top of this Unionfs stack. Once we have /odf
8850+ * and cache-coherency resolved, we'll address the branch-path
8851+ * uniqueness.
8852+ */
8853+ err = kern_path(optarg, LOOKUP_FOLLOW, &path);
8854+ if (err) {
8855+ printk(KERN_ERR "unionfs: error accessing "
8856+ "lower directory \"%s\" (error %d)\n",
8857+ optarg, err);
8858+ goto out;
8859+ }
8860+ for (idx = 0; idx < cur_branches; idx++)
8861+ if (path.mnt == new_lower_paths[idx].mnt &&
8862+ path.dentry == new_lower_paths[idx].dentry)
8863+ break;
8864+ path_put(&path); /* no longer needed */
8865+ if (idx == cur_branches) {
8866+ printk(KERN_ERR "unionfs: branch \"%s\" "
8867+ "not found\n", optarg);
8868+ err = -ENOENT;
8869+ goto out;
8870+ }
8871+ /* check if there are any open files on the branch to be deleted */
8872+ if (atomic_read(&new_data[idx].open_files) > 0) {
8873+ err = -EBUSY;
8874+ goto out;
8875+ }
8876+
8877+ /*
8878+ * Now we have to delete the branch. First, release any handles it
8879+ * has. Then, move the remaining array indexes past "idx" in
8880+ * new_data and new_lower_paths one to the left. Finally, adjust
8881+ * cur_branches.
8882+ */
8883+ path_put(&new_lower_paths[idx]);
8884+
8885+ if (idx < cur_branches - 1) {
8886+ /* if idx==cur_branches-1, we delete last branch: easy */
8887+ memmove(&new_data[idx], &new_data[idx+1],
8888+ (cur_branches - 1 - idx) *
8889+ sizeof(struct unionfs_data));
8890+ memmove(&new_lower_paths[idx], &new_lower_paths[idx+1],
8891+ (cur_branches - 1 - idx) * sizeof(struct path));
8892+ }
8893+
8894+ err = 0;
8895+out:
8896+ return err;
8897+}
8898+
8899+/* handle branch insertion during remount */
8900+static noinline_for_stack int do_remount_add_option(
8901+ char *optarg, int cur_branches,
8902+ struct unionfs_data *new_data,
8903+ struct path *new_lower_paths,
8904+ int *high_branch_id)
8905+{
8906+ int err = -EINVAL;
8907+ int perms;
8908+ int idx = 0; /* default: insert at beginning */
8909+ char *new_branch , *modename = NULL;
8910+ struct path path;
8911+
8912+ /*
8913+ * optarg can be of several forms:
8914+ *
8915+ * /bar:/foo insert /foo before /bar
8916+ * /bar:/foo=ro insert /foo in ro mode before /bar
8917+ * /foo insert /foo in the beginning (prepend)
8918+ * :/foo insert /foo at the end (append)
8919+ */
8920+ if (*optarg == ':') { /* append? */
8921+ new_branch = optarg + 1; /* skip ':' */
8922+ idx = cur_branches;
8923+ goto found_insertion_point;
8924+ }
8925+ new_branch = strchr(optarg, ':');
8926+ if (!new_branch) { /* prepend? */
8927+ new_branch = optarg;
8928+ goto found_insertion_point;
8929+ }
8930+ *new_branch++ = '\0'; /* holds path+mode of new branch */
8931+
8932+ /*
8933+ * Find matching branch index. For now, this assumes that nothing
8934+ * has been mounted on top of this Unionfs stack. Once we have /odf
8935+ * and cache-coherency resolved, we'll address the branch-path
8936+ * uniqueness.
8937+ */
8938+ err = kern_path(optarg, LOOKUP_FOLLOW, &path);
8939+ if (err) {
8940+ printk(KERN_ERR "unionfs: error accessing "
8941+ "lower directory \"%s\" (error %d)\n",
8942+ optarg, err);
8943+ goto out;
8944+ }
8945+ for (idx = 0; idx < cur_branches; idx++)
8946+ if (path.mnt == new_lower_paths[idx].mnt &&
8947+ path.dentry == new_lower_paths[idx].dentry)
8948+ break;
8949+ path_put(&path); /* no longer needed */
8950+ if (idx == cur_branches) {
8951+ printk(KERN_ERR "unionfs: branch \"%s\" "
8952+ "not found\n", optarg);
8953+ err = -ENOENT;
8954+ goto out;
8955+ }
8956+
8957+ /*
8958+ * At this point idx will hold the index where the new branch should
8959+ * be inserted before.
8960+ */
8961+found_insertion_point:
8962+ /* find the mode for the new branch */
8963+ if (new_branch)
8964+ modename = strchr(new_branch, '=');
8965+ if (modename)
8966+ *modename++ = '\0';
8967+ if (!new_branch || !*new_branch) {
8968+ printk(KERN_ERR "unionfs: null new branch\n");
8969+ err = -EINVAL;
8970+ goto out;
8971+ }
8972+ err = parse_branch_mode(modename, &perms);
8973+ if (err) {
8974+ printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
8975+ "branch \"%s\"\n", modename, new_branch);
8976+ goto out;
8977+ }
8978+ err = kern_path(new_branch, LOOKUP_FOLLOW, &path);
8979+ if (err) {
8980+ printk(KERN_ERR "unionfs: error accessing "
8981+ "lower directory \"%s\" (error %d)\n",
8982+ new_branch, err);
8983+ goto out;
8984+ }
8985+ /*
8986+ * It's probably safe to check_mode the new branch to insert. Note:
8987+ * we don't allow inserting branches which are unionfs's by
8988+ * themselves (check_branch returns EINVAL in that case). This is
8989+ * because this code base doesn't support stacking unionfs: the ODF
8990+ * code base supports that correctly.
8991+ */
8992+ err = check_branch(&path);
8993+ if (err) {
8994+ printk(KERN_ERR "unionfs: lower directory "
8995+ "\"%s\" is not a valid branch\n", optarg);
8996+ path_put(&path);
8997+ goto out;
8998+ }
8999+
9000+ /*
9001+ * Now we have to insert the new branch. But first, move the bits
9002+ * to make space for the new branch, if needed. Finally, adjust
9003+ * cur_branches.
9004+ * We don't release nd here; it's kept until umount/remount.
9005+ */
9006+ if (idx < cur_branches) {
9007+ /* if idx==cur_branches, we append: easy */
9008+ memmove(&new_data[idx+1], &new_data[idx],
9009+ (cur_branches - idx) * sizeof(struct unionfs_data));
9010+ memmove(&new_lower_paths[idx+1], &new_lower_paths[idx],
9011+ (cur_branches - idx) * sizeof(struct path));
9012+ }
9013+ new_lower_paths[idx].dentry = path.dentry;
9014+ new_lower_paths[idx].mnt = path.mnt;
9015+
9016+ new_data[idx].sb = path.dentry->d_sb;
9017+ atomic_set(&new_data[idx].open_files, 0);
9018+ new_data[idx].branchperms = perms;
9019+ new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */
9020+
9021+ err = 0;
9022+out:
9023+ return err;
9024+}
9025+
9026+
9027+/*
9028+ * Support branch management options on remount.
9029+ *
9030+ * See Documentation/filesystems/unionfs/ for details.
9031+ *
9032+ * @flags: numeric mount options
9033+ * @options: mount options string
9034+ *
9035+ * This function can rearrange a mounted union dynamically, adding and
9036+ * removing branches, including changing branch modes. Clearly this has to
9037+ * be done safely and atomically. Luckily, the VFS already calls this
9038+ * function with lock_super(sb) and lock_kernel() held, preventing
9039+ * concurrent mixing of new mounts, remounts, and unmounts. Moreover,
9040+ * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb)
9041+ * to purge dentries/inodes from our superblock, and also called
9042+ * fsync_super(sb) to purge any dirty pages. So we're good.
9043+ *
9044+ * XXX: however, our remount code may also need to invalidate mapped pages
9045+ * so as to force them to be re-gotten from the (newly reconfigured) lower
9046+ * branches. This has to wait for proper mmap and cache coherency support
9047+ * in the VFS.
9048+ *
9049+ */
9050+static int unionfs_remount_fs(struct super_block *sb, int *flags,
9051+ char *options)
9052+{
9053+ int err = 0;
9054+ int i;
9055+ char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */
9056+ char *optname;
9057+ int cur_branches = 0; /* no. of current branches */
9058+ int new_branches = 0; /* no. of branches actually left in the end */
9059+ int add_branches; /* est. no. of branches to add */
9060+ int del_branches; /* est. no. of branches to del */
9061+ int max_branches; /* max possible no. of branches */
9062+ struct unionfs_data *new_data = NULL, *tmp_data = NULL;
9063+ struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
9064+ struct inode **new_lower_inodes = NULL;
9065+ int new_high_branch_id; /* new high branch ID */
9066+ int size; /* memory allocation size, temp var */
9067+ int old_ibstart, old_ibend;
9068+
9069+ unionfs_write_lock(sb);
9070+
9071+ /*
9072+ * The VFS will take care of "ro" and "rw" flags, and we can safely
9073+ * ignore MS_SILENT, but anything else left over is an error. So we
9074+ * need to check if any other flags may have been passed (none are
9075+ * allowed/supported as of now).
9076+ */
9077+ if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) {
9078+ printk(KERN_ERR
9079+ "unionfs: remount flags 0x%x unsupported\n", *flags);
9080+ err = -EINVAL;
9081+ goto out_error;
9082+ }
9083+
9084+ /*
9085+ * If 'options' is NULL, it's probably because the user just changed
9086+ * the union to a "ro" or "rw" and the VFS took care of it. So
9087+ * nothing to do and we're done.
9088+ */
9089+ if (!options || options[0] == '\0')
9090+ goto out_error;
9091+
9092+ /*
9093+ * Find out how many branches we will have in the end, counting
9094+ * "add" and "del" commands. Copy the "options" string because
9095+ * strsep modifies the string and we need it later.
9096+ */
9097+ tmp_to_free = kstrdup(options, GFP_KERNEL);
9098+ optionstmp = tmp_to_free;
9099+ if (unlikely(!optionstmp)) {
9100+ err = -ENOMEM;
9101+ goto out_free;
9102+ }
9103+ cur_branches = sbmax(sb); /* current no. branches */
9104+ new_branches = sbmax(sb);
9105+ del_branches = 0;
9106+ add_branches = 0;
9107+ new_high_branch_id = sbhbid(sb); /* save current high_branch_id */
9108+ while ((optname = strsep(&optionstmp, ",")) != NULL) {
9109+ char *optarg;
9110+
9111+ if (!optname || !*optname)
9112+ continue;
9113+
9114+ optarg = strchr(optname, '=');
9115+ if (optarg)
9116+ *optarg++ = '\0';
9117+
9118+ if (!strcmp("add", optname))
9119+ add_branches++;
9120+ else if (!strcmp("del", optname))
9121+ del_branches++;
9122+ }
9123+ kfree(tmp_to_free);
9124+ /* after all changes, will we have at least one branch left? */
9125+ if ((new_branches + add_branches - del_branches) < 1) {
9126+ printk(KERN_ERR
9127+ "unionfs: no branches left after remount\n");
9128+ err = -EINVAL;
9129+ goto out_free;
9130+ }
9131+
9132+ /*
9133+ * Since we haven't actually parsed all the add/del options, nor
9134+ * have we checked them for errors, we don't know for sure how many
9135+ * branches we will have after all changes have taken place. In
9136+ * fact, the total number of branches left could be less than what
9137+ * we have now. So we need to allocate space for a temporary
9138+ * placeholder that is at least as large as the maximum number of
9139+ * branches we *could* have, which is the current number plus all
9140+ * the additions. Once we're done with these temp placeholders, we
9141+ * may have to re-allocate the final size, copy over from the temp,
9142+ * and then free the temps (done near the end of this function).
9143+ */
9144+ max_branches = cur_branches + add_branches;
9145+ /* allocate space for new pointers to lower dentry */
9146+ tmp_data = kcalloc(max_branches,
9147+ sizeof(struct unionfs_data), GFP_KERNEL);
9148+ if (unlikely(!tmp_data)) {
9149+ err = -ENOMEM;
9150+ goto out_free;
9151+ }
9152+ /* allocate space for new pointers to lower paths */
9153+ tmp_lower_paths = kcalloc(max_branches,
9154+ sizeof(struct path), GFP_KERNEL);
9155+ if (unlikely(!tmp_lower_paths)) {
9156+ err = -ENOMEM;
9157+ goto out_free;
9158+ }
9159+ /* copy current info into new placeholders, incrementing refcnts */
9160+ memcpy(tmp_data, UNIONFS_SB(sb)->data,
9161+ cur_branches * sizeof(struct unionfs_data));
9162+ memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths,
9163+ cur_branches * sizeof(struct path));
9164+ for (i = 0; i < cur_branches; i++)
9165+ path_get(&tmp_lower_paths[i]); /* drop refs at end of fxn */
9166+
9167+ /*******************************************************************
9168+ * For each branch command, do kern_path on the requested branch,
9169+ * and apply the change to a temp branch list. To handle errors, we
9170+ * already dup'ed the old arrays (above), and increased the refcnts
9171+ * on various f/s objects. So now we can do all the kern_path'ss
9172+ * and branch-management commands on the new arrays. If it fail mid
9173+ * way, we free the tmp arrays and *put all objects. If we succeed,
9174+ * then we free old arrays and *put its objects, and then replace
9175+ * the arrays with the new tmp list (we may have to re-allocate the
9176+ * memory because the temp lists could have been larger than what we
9177+ * actually needed).
9178+ *******************************************************************/
9179+
9180+ while ((optname = strsep(&options, ",")) != NULL) {
9181+ char *optarg;
9182+
9183+ if (!optname || !*optname)
9184+ continue;
9185+ /*
9186+ * At this stage optname holds a comma-delimited option, but
9187+ * without the commas. Next, we need to break the string on
9188+ * the '=' symbol to separate CMD=ARG, where ARG itself can
9189+ * be KEY=VAL. For example, in mode=/foo=rw, CMD is "mode",
9190+ * KEY is "/foo", and VAL is "rw".
9191+ */
9192+ optarg = strchr(optname, '=');
9193+ if (optarg)
9194+ *optarg++ = '\0';
9195+ /* incgen remount option (instead of old ioctl) */
9196+ if (!strcmp("incgen", optname)) {
9197+ err = 0;
9198+ goto out_no_change;
9199+ }
9200+
9201+ /*
9202+ * All of our options take an argument now. (Insert ones
9203+ * that don't above this check.) So at this stage optname
9204+ * contains the CMD part and optarg contains the ARG part.
9205+ */
9206+ if (!optarg || !*optarg) {
9207+ printk(KERN_ERR "unionfs: all remount options require "
9208+ "an argument (%s)\n", optname);
9209+ err = -EINVAL;
9210+ goto out_release;
9211+ }
9212+
9213+ if (!strcmp("add", optname)) {
9214+ err = do_remount_add_option(optarg, new_branches,
9215+ tmp_data,
9216+ tmp_lower_paths,
9217+ &new_high_branch_id);
9218+ if (err)
9219+ goto out_release;
9220+ new_branches++;
9221+ if (new_branches > UNIONFS_MAX_BRANCHES) {
9222+ printk(KERN_ERR "unionfs: command exceeds "
9223+ "%d branches\n", UNIONFS_MAX_BRANCHES);
9224+ err = -E2BIG;
9225+ goto out_release;
9226+ }
9227+ continue;
9228+ }
9229+ if (!strcmp("del", optname)) {
9230+ err = do_remount_del_option(optarg, new_branches,
9231+ tmp_data,
9232+ tmp_lower_paths);
9233+ if (err)
9234+ goto out_release;
9235+ new_branches--;
9236+ continue;
9237+ }
9238+ if (!strcmp("mode", optname)) {
9239+ err = do_remount_mode_option(optarg, new_branches,
9240+ tmp_data,
9241+ tmp_lower_paths);
9242+ if (err)
9243+ goto out_release;
9244+ continue;
9245+ }
9246+
9247+ /*
9248+ * When you use "mount -o remount,ro", mount(8) will
9249+ * reportedly pass the original dirs= string from
9250+ * /proc/mounts. So for now, we have to ignore dirs= and
9251+ * not consider it an error, unless we want to allow users
9252+ * to pass dirs= in remount. Note that to allow the VFS to
9253+ * actually process the ro/rw remount options, we have to
9254+ * return 0 from this function.
9255+ */
9256+ if (!strcmp("dirs", optname)) {
9257+ printk(KERN_WARNING
9258+ "unionfs: remount ignoring option \"%s\"\n",
9259+ optname);
9260+ continue;
9261+ }
9262+
9263+ err = -EINVAL;
9264+ printk(KERN_ERR
9265+ "unionfs: unrecognized option \"%s\"\n", optname);
9266+ goto out_release;
9267+ }
9268+
9269+out_no_change:
9270+
9271+ /******************************************************************
9272+ * WE'RE ALMOST DONE: check if leftmost branch might be read-only,
9273+ * see if we need to allocate a small-sized new vector, copy the
9274+ * vectors to their correct place, release the refcnt of the older
9275+ * ones, and return. Also handle invalidating any pages that will
9276+ * have to be re-read.
9277+ *******************************************************************/
9278+
9279+ if (!(tmp_data[0].branchperms & MAY_WRITE)) {
9280+ printk(KERN_ERR "unionfs: leftmost branch cannot be read-only "
9281+ "(use \"remount,ro\" to create a read-only union)\n");
9282+ err = -EINVAL;
9283+ goto out_release;
9284+ }
9285+
9286+ /* (re)allocate space for new pointers to lower dentry */
9287+ size = new_branches * sizeof(struct unionfs_data);
9288+ new_data = krealloc(tmp_data, size, GFP_KERNEL);
9289+ if (unlikely(!new_data)) {
9290+ err = -ENOMEM;
9291+ goto out_release;
9292+ }
9293+
9294+ /* allocate space for new pointers to lower paths */
9295+ size = new_branches * sizeof(struct path);
9296+ new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
9297+ if (unlikely(!new_lower_paths)) {
9298+ err = -ENOMEM;
9299+ goto out_release;
9300+ }
9301+
9302+ /* allocate space for new pointers to lower inodes */
9303+ new_lower_inodes = kcalloc(new_branches,
9304+ sizeof(struct inode *), GFP_KERNEL);
9305+ if (unlikely(!new_lower_inodes)) {
9306+ err = -ENOMEM;
9307+ goto out_release;
9308+ }
9309+
9310+ /*
9311+ * OK, just before we actually put the new set of branches in place,
9312+ * we need to ensure that our own f/s has no dirty objects left.
9313+ * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and
9314+ * fsync_super(sb), taking care of dentries, inodes, and dirty
9315+ * pages. So all that's left is for us to invalidate any leftover
9316+ * (non-dirty) pages to ensure that they will be re-read from the
9317+ * new lower branches (and to support mmap).
9318+ */
9319+
9320+ /*
9321+ * Once we finish the remounting successfully, our superblock
9322+ * generation number will have increased. This will be detected by
9323+ * our dentry-revalidation code upon subsequent f/s operations
9324+ * through unionfs. The revalidation code will rebuild the union of
9325+ * lower inodes for a given unionfs inode and invalidate any pages
9326+ * of such "stale" inodes (by calling our purge_inode_data
9327+ * function). This revalidation will happen lazily and
9328+ * incrementally, as users perform operations on cached inodes. We
9329+ * would like to encourage this revalidation to happen sooner if
9330+ * possible, so we like to try to invalidate as many other pages in
9331+ * our superblock as we can. We used to call drop_pagecache_sb() or
9332+ * a variant thereof, but either method was racy (drop_caches alone
9333+ * is known to be racy). So now we let the revalidation happen on a
9334+ * per file basis in ->d_revalidate.
9335+ */
9336+
9337+ /* grab new lower super references; release old ones */
9338+ for (i = 0; i < new_branches; i++)
9339+ atomic_inc(&new_data[i].sb->s_active);
9340+ for (i = 0; i < sbmax(sb); i++)
9341+ atomic_dec(&UNIONFS_SB(sb)->data[i].sb->s_active);
9342+
9343+ /* copy new vectors into their correct place */
9344+ tmp_data = UNIONFS_SB(sb)->data;
9345+ UNIONFS_SB(sb)->data = new_data;
9346+ new_data = NULL; /* so don't free good pointers below */
9347+ tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths;
9348+ UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths;
9349+ new_lower_paths = NULL; /* so don't free good pointers below */
9350+
9351+ /* update our unionfs_sb_info and root dentry index of last branch */
9352+ i = sbmax(sb); /* save no. of branches to release at end */
9353+ sbend(sb) = new_branches - 1;
9354+ dbend(sb->s_root) = new_branches - 1;
9355+ old_ibstart = ibstart(sb->s_root->d_inode);
9356+ old_ibend = ibend(sb->s_root->d_inode);
9357+ ibend(sb->s_root->d_inode) = new_branches - 1;
9358+ UNIONFS_D(sb->s_root)->bcount = new_branches;
9359+ new_branches = i; /* no. of branches to release below */
9360+
9361+ /*
9362+ * Update lower inodes: 3 steps
9363+ * 1. grab ref on all new lower inodes
9364+ */
9365+ for (i = dbstart(sb->s_root); i <= dbend(sb->s_root); i++) {
9366+ struct dentry *lower_dentry =
9367+ unionfs_lower_dentry_idx(sb->s_root, i);
9368+ igrab(lower_dentry->d_inode);
9369+ new_lower_inodes[i] = lower_dentry->d_inode;
9370+ }
9371+ /* 2. release reference on all older lower inodes */
9372+ iput_lowers(sb->s_root->d_inode, old_ibstart, old_ibend, true);
9373+ /* 3. update root dentry's inode to new lower_inodes array */
9374+ UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes;
9375+ new_lower_inodes = NULL;
9376+
9377+ /* maxbytes may have changed */
9378+ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
9379+ /* update high branch ID */
9380+ sbhbid(sb) = new_high_branch_id;
9381+
9382+ /* update our sb->generation for revalidating objects */
9383+ i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
9384+ atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
9385+ atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
9386+ if (!(*flags & MS_SILENT))
9387+ pr_info("unionfs: %s: new generation number %d\n",
9388+ UNIONFS_SB(sb)->dev_name, i);
9389+ /* finally, update the root dentry's times */
9390+ unionfs_copy_attr_times(sb->s_root->d_inode);
9391+ err = 0; /* reset to success */
9392+
9393+ /*
9394+ * The code above falls through to the next label, and releases the
9395+ * refcnts of the older ones (stored in tmp_*): if we fell through
9396+ * here, it means success. However, if we jump directly to this
9397+ * label from any error above, then an error occurred after we
9398+ * grabbed various refcnts, and so we have to release the
9399+ * temporarily constructed structures.
9400+ */
9401+out_release:
9402+ /* no need to cleanup/release anything in tmp_data */
9403+ if (tmp_lower_paths)
9404+ for (i = 0; i < new_branches; i++)
9405+ path_put(&tmp_lower_paths[i]);
9406+out_free:
9407+ kfree(tmp_lower_paths);
9408+ kfree(tmp_data);
9409+ kfree(new_lower_paths);
9410+ kfree(new_data);
9411+ kfree(new_lower_inodes);
9412+out_error:
9413+ unionfs_check_dentry(sb->s_root);
9414+ unionfs_write_unlock(sb);
9415+ return err;
9416+}
9417+
9418+/*
9419+ * Called by iput() when the inode reference count reached zero
9420+ * and the inode is not hashed anywhere. Used to clear anything
9421+ * that needs to be, before the inode is completely destroyed and put
9422+ * on the inode free list.
9423+ *
9424+ * No need to lock sb info's rwsem.
9425+ */
9426+static void unionfs_evict_inode(struct inode *inode)
9427+{
9428+ int bindex, bstart, bend;
9429+ struct inode *lower_inode;
9430+ struct list_head *pos, *n;
9431+ struct unionfs_dir_state *rdstate;
9432+
9433+ truncate_inode_pages(&inode->i_data, 0);
9434+ end_writeback(inode);
9435+
9436+ list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9437+ rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9438+ list_del(&rdstate->cache);
9439+ free_rdstate(rdstate);
9440+ }
9441+
9442+ /*
9443+ * Decrement a reference to a lower_inode, which was incremented
9444+ * by our read_inode when it was created initially.
9445+ */
9446+ bstart = ibstart(inode);
9447+ bend = ibend(inode);
9448+ if (bstart >= 0) {
9449+ for (bindex = bstart; bindex <= bend; bindex++) {
9450+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
9451+ if (!lower_inode)
9452+ continue;
9453+ unionfs_set_lower_inode_idx(inode, bindex, NULL);
9454+ /* see Documentation/filesystems/unionfs/issues.txt */
9455+ lockdep_off();
9456+ iput(lower_inode);
9457+ lockdep_on();
9458+ }
9459+ }
9460+
9461+ kfree(UNIONFS_I(inode)->lower_inodes);
9462+ UNIONFS_I(inode)->lower_inodes = NULL;
9463+}
9464+
9465+static struct inode *unionfs_alloc_inode(struct super_block *sb)
9466+{
9467+ struct unionfs_inode_info *i;
9468+
9469+ i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL);
9470+ if (unlikely(!i))
9471+ return NULL;
9472+
9473+ /* memset everything up to the inode to 0 */
9474+ memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode));
9475+
9476+ i->vfs_inode.i_version = 1;
9477+ return &i->vfs_inode;
9478+}
9479+
9480+static void unionfs_destroy_inode(struct inode *inode)
9481+{
9482+ kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode));
9483+}
9484+
9485+/* unionfs inode cache constructor */
9486+static void init_once(void *obj)
9487+{
9488+ struct unionfs_inode_info *i = obj;
9489+
9490+ inode_init_once(&i->vfs_inode);
9491+}
9492+
9493+int unionfs_init_inode_cache(void)
9494+{
9495+ int err = 0;
9496+
9497+ unionfs_inode_cachep =
9498+ kmem_cache_create("unionfs_inode_cache",
9499+ sizeof(struct unionfs_inode_info), 0,
9500+ SLAB_RECLAIM_ACCOUNT, init_once);
9501+ if (unlikely(!unionfs_inode_cachep))
9502+ err = -ENOMEM;
9503+ return err;
9504+}
9505+
9506+/* unionfs inode cache destructor */
9507+void unionfs_destroy_inode_cache(void)
9508+{
9509+ if (unionfs_inode_cachep)
9510+ kmem_cache_destroy(unionfs_inode_cachep);
9511+}
9512+
9513+/*
9514+ * Called when we have a dirty inode, right here we only throw out
9515+ * parts of our readdir list that are too old.
9516+ *
9517+ * No need to grab sb info's rwsem.
9518+ */
9519+static int unionfs_write_inode(struct inode *inode,
9520+ struct writeback_control *wbc)
9521+{
9522+ struct list_head *pos, *n;
9523+ struct unionfs_dir_state *rdstate;
9524+
9525+ spin_lock(&UNIONFS_I(inode)->rdlock);
9526+ list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9527+ rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9528+ /* We keep this list in LRU order. */
9529+ if ((rdstate->access + RDCACHE_JIFFIES) > jiffies)
9530+ break;
9531+ UNIONFS_I(inode)->rdcount--;
9532+ list_del(&rdstate->cache);
9533+ free_rdstate(rdstate);
9534+ }
9535+ spin_unlock(&UNIONFS_I(inode)->rdlock);
9536+
9537+ return 0;
9538+}
9539+
9540+/*
9541+ * Used only in nfs, to kill any pending RPC tasks, so that subsequent
9542+ * code can actually succeed and won't leave tasks that need handling.
9543+ */
9544+static void unionfs_umount_begin(struct super_block *sb)
9545+{
9546+ struct super_block *lower_sb;
9547+ int bindex, bstart, bend;
9548+
9549+ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9550+
9551+ bstart = sbstart(sb);
9552+ bend = sbend(sb);
9553+ for (bindex = bstart; bindex <= bend; bindex++) {
9554+ lower_sb = unionfs_lower_super_idx(sb, bindex);
9555+
9556+ if (lower_sb && lower_sb->s_op &&
9557+ lower_sb->s_op->umount_begin)
9558+ lower_sb->s_op->umount_begin(lower_sb);
9559+ }
9560+
9561+ unionfs_read_unlock(sb);
9562+}
9563+
9564+static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt)
9565+{
9566+ struct super_block *sb = mnt->mnt_sb;
9567+ int ret = 0;
9568+ char *tmp_page;
9569+ char *path;
9570+ int bindex, bstart, bend;
9571+ int perms;
9572+
9573+ /* to prevent a silly lockdep warning with namespace_sem */
9574+ lockdep_off();
9575+ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9576+ unionfs_lock_dentry(sb->s_root, UNIONFS_DMUTEX_CHILD);
9577+
9578+ tmp_page = (char *) __get_free_page(GFP_KERNEL);
9579+ if (unlikely(!tmp_page)) {
9580+ ret = -ENOMEM;
9581+ goto out;
9582+ }
9583+
9584+ bstart = sbstart(sb);
9585+ bend = sbend(sb);
9586+
9587+ seq_printf(m, ",dirs=");
9588+ for (bindex = bstart; bindex <= bend; bindex++) {
9589+ struct path p;
9590+ p.dentry = unionfs_lower_dentry_idx(sb->s_root, bindex);
9591+ p.mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
9592+ path = d_path(&p, tmp_page, PAGE_SIZE);
9593+ if (IS_ERR(path)) {
9594+ ret = PTR_ERR(path);
9595+ goto out;
9596+ }
9597+
9598+ perms = branchperms(sb, bindex);
9599+
9600+ seq_printf(m, "%s=%s", path,
9601+ perms & MAY_WRITE ? "rw" : "ro");
9602+ if (bindex != bend)
9603+ seq_printf(m, ":");
9604+ }
9605+
9606+out:
9607+ free_page((unsigned long) tmp_page);
9608+
9609+ unionfs_unlock_dentry(sb->s_root);
9610+ unionfs_read_unlock(sb);
9611+ lockdep_on();
9612+
9613+ return ret;
9614+}
9615+
9616+struct super_operations unionfs_sops = {
9617+ .put_super = unionfs_put_super,
9618+ .statfs = unionfs_statfs,
9619+ .remount_fs = unionfs_remount_fs,
9620+ .evict_inode = unionfs_evict_inode,
9621+ .umount_begin = unionfs_umount_begin,
9622+ .show_options = unionfs_show_options,
9623+ .write_inode = unionfs_write_inode,
9624+ .alloc_inode = unionfs_alloc_inode,
9625+ .destroy_inode = unionfs_destroy_inode,
9626+};
9627diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
9628new file mode 100644
9629index 0000000..1821705
9630--- /dev/null
9631+++ b/fs/unionfs/union.h
9632@@ -0,0 +1,679 @@
9633+/*
9634+ * Copyright (c) 2003-2011 Erez Zadok
9635+ * Copyright (c) 2003-2006 Charles P. Wright
9636+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
9637+ * Copyright (c) 2005 Arun M. Krishnakumar
9638+ * Copyright (c) 2004-2006 David P. Quigley
9639+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
9640+ * Copyright (c) 2003 Puja Gupta
9641+ * Copyright (c) 2003 Harikesavan Krishnan
9642+ * Copyright (c) 2003-2011 Stony Brook University
9643+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
9644+ *
9645+ * This program is free software; you can redistribute it and/or modify
9646+ * it under the terms of the GNU General Public License version 2 as
9647+ * published by the Free Software Foundation.
9648+ */
9649+
9650+#ifndef _UNION_H_
9651+#define _UNION_H_
9652+
9653+#include <linux/dcache.h>
9654+#include <linux/file.h>
9655+#include <linux/list.h>
9656+#include <linux/fs.h>
9657+#include <linux/mm.h>
9658+#include <linux/module.h>
9659+#include <linux/mount.h>
9660+#include <linux/namei.h>
9661+#include <linux/page-flags.h>
9662+#include <linux/pagemap.h>
9663+#include <linux/poll.h>
9664+#include <linux/security.h>
9665+#include <linux/seq_file.h>
9666+#include <linux/slab.h>
9667+#include <linux/spinlock.h>
9668+#include <linux/statfs.h>
9669+#include <linux/string.h>
9670+#include <linux/vmalloc.h>
9671+#include <linux/writeback.h>
9672+#include <linux/buffer_head.h>
9673+#include <linux/xattr.h>
9674+#include <linux/fs_stack.h>
9675+#include <linux/magic.h>
9676+#include <linux/log2.h>
9677+#include <linux/poison.h>
9678+#include <linux/mman.h>
9679+#include <linux/backing-dev.h>
9680+#include <linux/splice.h>
9681+#include <linux/sched.h>
9682+
9683+#include <asm/system.h>
9684+
9685+#include <linux/union_fs.h>
9686+
9687+/* the file system name */
9688+#define UNIONFS_NAME "unionfs"
9689+
9690+/* unionfs root inode number */
9691+#define UNIONFS_ROOT_INO 1
9692+
9693+/* number of times we try to get a unique temporary file name */
9694+#define GET_TMPNAM_MAX_RETRY 5
9695+
9696+/* maximum number of branches we support, to avoid memory blowup */
9697+#define UNIONFS_MAX_BRANCHES 128
9698+
9699+/* minimum time (seconds) required for time-based cache-coherency */
9700+#define UNIONFS_MIN_CC_TIME 3
9701+
9702+/* Operations vectors defined in specific files. */
9703+extern struct file_operations unionfs_main_fops;
9704+extern struct file_operations unionfs_dir_fops;
9705+extern struct inode_operations unionfs_main_iops;
9706+extern struct inode_operations unionfs_dir_iops;
9707+extern struct inode_operations unionfs_symlink_iops;
9708+extern struct super_operations unionfs_sops;
9709+extern struct dentry_operations unionfs_dops;
9710+extern struct address_space_operations unionfs_aops, unionfs_dummy_aops;
9711+extern struct vm_operations_struct unionfs_vm_ops;
9712+
9713+/* How long should an entry be allowed to persist */
9714+#define RDCACHE_JIFFIES (5*HZ)
9715+
9716+/* compatibility with Real-Time patches */
9717+#ifdef CONFIG_PREEMPT_RT
9718+# define unionfs_rw_semaphore compat_rw_semaphore
9719+#else /* not CONFIG_PREEMPT_RT */
9720+# define unionfs_rw_semaphore rw_semaphore
9721+#endif /* not CONFIG_PREEMPT_RT */
9722+
9723+/* file private data. */
9724+struct unionfs_file_info {
9725+ int bstart;
9726+ int bend;
9727+ atomic_t generation;
9728+
9729+ struct unionfs_dir_state *rdstate;
9730+ struct file **lower_files;
9731+ int *saved_branch_ids; /* IDs of branches when file was opened */
9732+ const struct vm_operations_struct *lower_vm_ops;
9733+ bool wrote_to_file; /* for delayed copyup */
9734+};
9735+
9736+/* unionfs inode data in memory */
9737+struct unionfs_inode_info {
9738+ int bstart;
9739+ int bend;
9740+ atomic_t generation;
9741+ /* Stuff for readdir over NFS. */
9742+ spinlock_t rdlock;
9743+ struct list_head readdircache;
9744+ int rdcount;
9745+ int hashsize;
9746+ int cookie;
9747+
9748+ /* The lower inodes */
9749+ struct inode **lower_inodes;
9750+
9751+ struct inode vfs_inode;
9752+};
9753+
9754+/* unionfs dentry data in memory */
9755+struct unionfs_dentry_info {
9756+ /*
9757+ * The semaphore is used to lock the dentry as soon as we get into a
9758+ * unionfs function from the VFS. Our lock ordering is that children
9759+ * go before their parents.
9760+ */
9761+ struct mutex lock;
9762+ int bstart;
9763+ int bend;
9764+ int bopaque;
9765+ int bcount;
9766+ atomic_t generation;
9767+ struct path *lower_paths;
9768+};
9769+
9770+/* These are the pointers to our various objects. */
9771+struct unionfs_data {
9772+ struct super_block *sb; /* lower super_block */
9773+ atomic_t open_files; /* number of open files on branch */
9774+ int branchperms;
9775+ int branch_id; /* unique branch ID at re/mount time */
9776+};
9777+
9778+/* unionfs super-block data in memory */
9779+struct unionfs_sb_info {
9780+ int bend;
9781+
9782+ atomic_t generation;
9783+
9784+ /*
9785+ * This rwsem is used to make sure that a branch management
9786+ * operation...
9787+ * 1) will not begin before all currently in-flight operations
9788+ * complete.
9789+ * 2) any new operations do not execute until the currently
9790+ * running branch management operation completes.
9791+ *
9792+ * The write_lock_owner records the PID of the task which grabbed
9793+ * the rw_sem for writing. If the same task also tries to grab the
9794+ * read lock, we allow it. This prevents a self-deadlock when
9795+ * branch-management is used on a pivot_root'ed union, because we
9796+ * have to ->lookup paths which belong to the same union.
9797+ */
9798+ struct unionfs_rw_semaphore rwsem;
9799+ pid_t write_lock_owner; /* PID of rw_sem owner (write lock) */
9800+ int high_branch_id; /* last unique branch ID given */
9801+ char *dev_name; /* to identify different unions in pr_debug */
9802+ struct unionfs_data *data;
9803+};
9804+
9805+/*
9806+ * structure for making the linked list of entries by readdir on left branch
9807+ * to compare with entries on right branch
9808+ */
9809+struct filldir_node {
9810+ struct list_head file_list; /* list for directory entries */
9811+ char *name; /* name entry */
9812+ int hash; /* name hash */
9813+ int namelen; /* name len since name is not 0 terminated */
9814+
9815+ /*
9816+ * we can check for duplicate whiteouts and files in the same branch
9817+ * in order to return -EIO.
9818+ */
9819+ int bindex;
9820+
9821+ /* is this a whiteout entry? */
9822+ int whiteout;
9823+
9824+ /* Inline name, so we don't need to separately kmalloc small ones */
9825+ char iname[DNAME_INLINE_LEN];
9826+};
9827+
9828+/* Directory hash table. */
9829+struct unionfs_dir_state {
9830+ unsigned int cookie; /* the cookie, based off of rdversion */
9831+ unsigned int offset; /* The entry we have returned. */
9832+ int bindex;
9833+ loff_t dirpos; /* offset within the lower level directory */
9834+ int size; /* How big is the hash table? */
9835+ int hashentries; /* How many entries have been inserted? */
9836+ unsigned long access;
9837+
9838+ /* This cache list is used when the inode keeps us around. */
9839+ struct list_head cache;
9840+ struct list_head list[0];
9841+};
9842+
9843+/* externs needed for fanout.h or sioq.h */
9844+extern int unionfs_get_nlinks(const struct inode *inode);
9845+extern void unionfs_copy_attr_times(struct inode *upper);
9846+extern void unionfs_copy_attr_all(struct inode *dest, const struct inode *src);
9847+
9848+/* include miscellaneous macros */
9849+#include "fanout.h"
9850+#include "sioq.h"
9851+
9852+/* externs for cache creation/deletion routines */
9853+extern void unionfs_destroy_filldir_cache(void);
9854+extern int unionfs_init_filldir_cache(void);
9855+extern int unionfs_init_inode_cache(void);
9856+extern void unionfs_destroy_inode_cache(void);
9857+extern int unionfs_init_dentry_cache(void);
9858+extern void unionfs_destroy_dentry_cache(void);
9859+
9860+/* Initialize and free readdir-specific state. */
9861+extern int init_rdstate(struct file *file);
9862+extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode,
9863+ int bindex);
9864+extern struct unionfs_dir_state *find_rdstate(struct inode *inode,
9865+ loff_t fpos);
9866+extern void free_rdstate(struct unionfs_dir_state *state);
9867+extern int add_filldir_node(struct unionfs_dir_state *rdstate,
9868+ const char *name, int namelen, int bindex,
9869+ int whiteout);
9870+extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
9871+ const char *name, int namelen,
9872+ int is_whiteout);
9873+
9874+extern struct dentry **alloc_new_dentries(int objs);
9875+extern struct unionfs_data *alloc_new_data(int objs);
9876+
9877+/* We can only use 32-bits of offset for rdstate --- blech! */
9878+#define DIREOF (0xfffff)
9879+#define RDOFFBITS 20 /* This is the number of bits in DIREOF. */
9880+#define MAXRDCOOKIE (0xfff)
9881+/* Turn an rdstate into an offset. */
9882+static inline off_t rdstate2offset(struct unionfs_dir_state *buf)
9883+{
9884+ off_t tmp;
9885+
9886+ tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS)
9887+ | (buf->offset & DIREOF);
9888+ return tmp;
9889+}
9890+
9891+/* Macros for locking a super_block. */
9892+enum unionfs_super_lock_class {
9893+ UNIONFS_SMUTEX_NORMAL,
9894+ UNIONFS_SMUTEX_PARENT, /* when locking on behalf of file */
9895+ UNIONFS_SMUTEX_CHILD, /* when locking on behalf of dentry */
9896+};
9897+static inline void unionfs_read_lock(struct super_block *sb, int subclass)
9898+{
9899+ if (UNIONFS_SB(sb)->write_lock_owner &&
9900+ UNIONFS_SB(sb)->write_lock_owner == current->pid)
9901+ return;
9902+ down_read_nested(&UNIONFS_SB(sb)->rwsem, subclass);
9903+}
9904+static inline void unionfs_read_unlock(struct super_block *sb)
9905+{
9906+ if (UNIONFS_SB(sb)->write_lock_owner &&
9907+ UNIONFS_SB(sb)->write_lock_owner == current->pid)
9908+ return;
9909+ up_read(&UNIONFS_SB(sb)->rwsem);
9910+}
9911+static inline void unionfs_write_lock(struct super_block *sb)
9912+{
9913+ down_write(&UNIONFS_SB(sb)->rwsem);
9914+ UNIONFS_SB(sb)->write_lock_owner = current->pid;
9915+}
9916+static inline void unionfs_write_unlock(struct super_block *sb)
9917+{
9918+ up_write(&UNIONFS_SB(sb)->rwsem);
9919+ UNIONFS_SB(sb)->write_lock_owner = 0;
9920+}
9921+
9922+static inline void unionfs_double_lock_dentry(struct dentry *d1,
9923+ struct dentry *d2)
9924+{
9925+ BUG_ON(d1 == d2);
9926+ if (d1 < d2) {
9927+ unionfs_lock_dentry(d1, UNIONFS_DMUTEX_PARENT);
9928+ unionfs_lock_dentry(d2, UNIONFS_DMUTEX_CHILD);
9929+ } else {
9930+ unionfs_lock_dentry(d2, UNIONFS_DMUTEX_PARENT);
9931+ unionfs_lock_dentry(d1, UNIONFS_DMUTEX_CHILD);
9932+ }
9933+}
9934+
9935+static inline void unionfs_double_unlock_dentry(struct dentry *d1,
9936+ struct dentry *d2)
9937+{
9938+ BUG_ON(d1 == d2);
9939+ if (d1 < d2) { /* unlock in reverse order than double_lock_dentry */
9940+ unionfs_unlock_dentry(d1);
9941+ unionfs_unlock_dentry(d2);
9942+ } else {
9943+ unionfs_unlock_dentry(d2);
9944+ unionfs_unlock_dentry(d1);
9945+ }
9946+}
9947+
9948+static inline void unionfs_double_lock_parents(struct dentry *p1,
9949+ struct dentry *p2)
9950+{
9951+ if (p1 == p2) {
9952+ unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_PARENT);
9953+ return;
9954+ }
9955+ if (p1 < p2) {
9956+ unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_PARENT);
9957+ unionfs_lock_dentry(p2, UNIONFS_DMUTEX_REVAL_CHILD);
9958+ } else {
9959+ unionfs_lock_dentry(p2, UNIONFS_DMUTEX_REVAL_PARENT);
9960+ unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_CHILD);
9961+ }
9962+}
9963+
9964+static inline void unionfs_double_unlock_parents(struct dentry *p1,
9965+ struct dentry *p2)
9966+{
9967+ if (p1 == p2) {
9968+ unionfs_unlock_dentry(p1);
9969+ return;
9970+ }
9971+ if (p1 < p2) { /* unlock in reverse order of double_lock_parents */
9972+ unionfs_unlock_dentry(p1);
9973+ unionfs_unlock_dentry(p2);
9974+ } else {
9975+ unionfs_unlock_dentry(p2);
9976+ unionfs_unlock_dentry(p1);
9977+ }
9978+}
9979+
9980+extern int new_dentry_private_data(struct dentry *dentry, int subclass);
9981+extern int realloc_dentry_private_data(struct dentry *dentry);
9982+extern void free_dentry_private_data(struct dentry *dentry);
9983+extern void update_bstart(struct dentry *dentry);
9984+extern int init_lower_nd(struct nameidata *nd, unsigned int flags);
9985+extern void release_lower_nd(struct nameidata *nd, int err);
9986+
9987+/*
9988+ * EXTERNALS:
9989+ */
9990+
9991+/* replicates the directory structure up to given dentry in given branch */
9992+extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
9993+ const char *name, int bindex);
9994+
9995+/* partial lookup */
9996+extern int unionfs_partial_lookup(struct dentry *dentry,
9997+ struct dentry *parent);
9998+extern struct dentry *unionfs_lookup_full(struct dentry *dentry,
9999+ struct dentry *parent,
10000+ int lookupmode);
10001+
10002+/* copies a file from dbstart to newbindex branch */
10003+extern int copyup_file(struct inode *dir, struct file *file, int bstart,
10004+ int newbindex, loff_t size);
10005+extern int copyup_named_file(struct inode *dir, struct file *file,
10006+ char *name, int bstart, int new_bindex,
10007+ loff_t len);
10008+/* copies a dentry from dbstart to newbindex branch */
10009+extern int copyup_dentry(struct inode *dir, struct dentry *dentry,
10010+ int bstart, int new_bindex, const char *name,
10011+ int namelen, struct file **copyup_file, loff_t len);
10012+/* helper functions for post-copyup actions */
10013+extern void unionfs_postcopyup_setmnt(struct dentry *dentry);
10014+extern void unionfs_postcopyup_release(struct dentry *dentry);
10015+
10016+/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */
10017+extern int check_empty(struct dentry *dentry, struct dentry *parent,
10018+ struct unionfs_dir_state **namelist);
10019+/* whiteout and opaque directory helpers */
10020+extern char *alloc_whname(const char *name, int len);
10021+extern bool is_whiteout_name(char **namep, int *namelenp);
10022+extern bool is_validname(const char *name);
10023+extern struct dentry *lookup_whiteout(const char *name,
10024+ struct dentry *lower_parent);
10025+extern struct dentry *find_first_whiteout(struct dentry *dentry);
10026+extern int unlink_whiteout(struct dentry *wh_dentry);
10027+extern int check_unlink_whiteout(struct dentry *dentry,
10028+ struct dentry *lower_dentry, int bindex);
10029+extern int create_whiteout(struct dentry *dentry, int start);
10030+extern int delete_whiteouts(struct dentry *dentry, int bindex,
10031+ struct unionfs_dir_state *namelist);
10032+extern int is_opaque_dir(struct dentry *dentry, int bindex);
10033+extern int make_dir_opaque(struct dentry *dir, int bindex);
10034+extern void unionfs_set_max_namelen(long *namelen);
10035+
10036+extern void unionfs_reinterpose(struct dentry *this_dentry);
10037+extern struct super_block *unionfs_duplicate_super(struct super_block *sb);
10038+
10039+/* Locking functions. */
10040+extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl);
10041+extern int unionfs_getlk(struct file *file, struct file_lock *fl);
10042+
10043+/* Common file operations. */
10044+extern int unionfs_file_revalidate(struct file *file, struct dentry *parent,
10045+ bool willwrite);
10046+extern int unionfs_open(struct inode *inode, struct file *file);
10047+extern int unionfs_file_release(struct inode *inode, struct file *file);
10048+extern int unionfs_flush(struct file *file, fl_owner_t id);
10049+extern long unionfs_ioctl(struct file *file, unsigned int cmd,
10050+ unsigned long arg);
10051+extern int unionfs_fsync(struct file *file, int datasync);
10052+extern int unionfs_fasync(int fd, struct file *file, int flag);
10053+
10054+/* Inode operations */
10055+extern struct inode *unionfs_iget(struct super_block *sb, unsigned long ino);
10056+extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
10057+ struct inode *new_dir, struct dentry *new_dentry);
10058+extern int unionfs_unlink(struct inode *dir, struct dentry *dentry);
10059+extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
10060+
10061+extern bool __unionfs_d_revalidate(struct dentry *dentry,
10062+ struct dentry *parent, bool willwrite);
10063+extern bool is_negative_lower(const struct dentry *dentry);
10064+extern bool is_newer_lower(const struct dentry *dentry);
10065+extern void purge_sb_data(struct super_block *sb);
10066+
10067+/* The values for unionfs_interpose's flag. */
10068+#define INTERPOSE_DEFAULT 0
10069+#define INTERPOSE_LOOKUP 1
10070+#define INTERPOSE_REVAL 2
10071+#define INTERPOSE_REVAL_NEG 3
10072+#define INTERPOSE_PARTIAL 4
10073+
10074+extern struct dentry *unionfs_interpose(struct dentry *this_dentry,
10075+ struct super_block *sb, int flag);
10076+
10077+#ifdef CONFIG_UNION_FS_XATTR
10078+/* Extended attribute functions. */
10079+extern void *unionfs_xattr_alloc(size_t size, size_t limit);
10080+static inline void unionfs_xattr_kfree(const void *p)
10081+{
10082+ kfree(p);
10083+}
10084+extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name,
10085+ void *value, size_t size);
10086+extern int unionfs_removexattr(struct dentry *dentry, const char *name);
10087+extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list,
10088+ size_t size);
10089+extern int unionfs_setxattr(struct dentry *dentry, const char *name,
10090+ const void *value, size_t size, int flags);
10091+#endif /* CONFIG_UNION_FS_XATTR */
10092+
10093+/* The root directory is unhashed, but isn't deleted. */
10094+static inline int d_deleted(struct dentry *d)
10095+{
10096+ return d_unhashed(d) && (d != d->d_sb->s_root);
10097+}
10098+
10099+/* unionfs_permission, check if we should bypass error to facilitate copyup */
10100+#define IS_COPYUP_ERR(err) ((err) == -EROFS)
10101+
10102+/* unionfs_open, check if we need to copyup the file */
10103+#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND)
10104+#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS)
10105+
10106+static inline int branchperms(const struct super_block *sb, int index)
10107+{
10108+ BUG_ON(index < 0);
10109+ return UNIONFS_SB(sb)->data[index].branchperms;
10110+}
10111+
10112+static inline int set_branchperms(struct super_block *sb, int index, int perms)
10113+{
10114+ BUG_ON(index < 0);
10115+ UNIONFS_SB(sb)->data[index].branchperms = perms;
10116+ return perms;
10117+}
10118+
10119+/* check if readonly lower inode, but possibly unlinked (no inode->i_sb) */
10120+static inline int __is_rdonly(const struct inode *inode)
10121+{
10122+ /* if unlinked, can't be readonly (?) */
10123+ if (!inode->i_sb)
10124+ return 0;
10125+ return IS_RDONLY(inode);
10126+
10127+}
10128+/* Is this file on a read-only branch? */
10129+static inline int is_robranch_super(const struct super_block *sb, int index)
10130+{
10131+ int ret;
10132+
10133+ ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0;
10134+ return ret;
10135+}
10136+
10137+/* Is this file on a read-only branch? */
10138+static inline int is_robranch_idx(const struct dentry *dentry, int index)
10139+{
10140+ struct super_block *lower_sb;
10141+
10142+ BUG_ON(index < 0);
10143+
10144+ if (!(branchperms(dentry->d_sb, index) & MAY_WRITE))
10145+ return -EROFS;
10146+
10147+ lower_sb = unionfs_lower_super_idx(dentry->d_sb, index);
10148+ BUG_ON(lower_sb == NULL);
10149+ /*
10150+ * test sb flags directly, not IS_RDONLY(lower_inode) because the
10151+ * lower_dentry could be a negative.
10152+ */
10153+ if (lower_sb->s_flags & MS_RDONLY)
10154+ return -EROFS;
10155+
10156+ return 0;
10157+}
10158+
10159+static inline int is_robranch(const struct dentry *dentry)
10160+{
10161+ int index;
10162+
10163+ index = UNIONFS_D(dentry)->bstart;
10164+ BUG_ON(index < 0);
10165+
10166+ return is_robranch_idx(dentry, index);
10167+}
10168+
10169+/*
10170+ * EXTERNALS:
10171+ */
10172+extern int check_branch(const struct path *path);
10173+extern int parse_branch_mode(const char *name, int *perms);
10174+
10175+/* locking helpers */
10176+static inline struct dentry *lock_parent(struct dentry *dentry)
10177+{
10178+ struct dentry *dir = dget_parent(dentry);
10179+ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
10180+ return dir;
10181+}
10182+static inline struct dentry *lock_parent_wh(struct dentry *dentry)
10183+{
10184+ struct dentry *dir = dget_parent(dentry);
10185+
10186+ mutex_lock_nested(&dir->d_inode->i_mutex, UNIONFS_DMUTEX_WHITEOUT);
10187+ return dir;
10188+}
10189+
10190+static inline void unlock_dir(struct dentry *dir)
10191+{
10192+ mutex_unlock(&dir->d_inode->i_mutex);
10193+ dput(dir);
10194+}
10195+
10196+/* lock base inode mutex before calling lookup_one_len */
10197+static inline struct dentry *lookup_lck_len(const char *name,
10198+ struct dentry *base, int len)
10199+{
10200+ struct dentry *d;
10201+ struct nameidata lower_nd;
10202+ int err;
10203+
10204+ err = init_lower_nd(&lower_nd, LOOKUP_OPEN);
10205+ if (unlikely(err < 0)) {
10206+ d = ERR_PTR(err);
10207+ goto out;
10208+ }
10209+ mutex_lock(&base->d_inode->i_mutex);
10210+ d = lookup_one_len_nd(name, base, len, &lower_nd);
10211+ release_lower_nd(&lower_nd, err);
10212+ mutex_unlock(&base->d_inode->i_mutex);
10213+out:
10214+ return d;
10215+}
10216+
10217+static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
10218+ int bindex)
10219+{
10220+ struct vfsmount *mnt;
10221+
10222+ BUG_ON(!dentry || bindex < 0);
10223+
10224+ mnt = mntget(unionfs_lower_mnt_idx(dentry, bindex));
10225+#ifdef CONFIG_UNION_FS_DEBUG
10226+ if (!mnt)
10227+ pr_debug("unionfs: mntget: mnt=%p bindex=%d\n",
10228+ mnt, bindex);
10229+#endif /* CONFIG_UNION_FS_DEBUG */
10230+
10231+ return mnt;
10232+}
10233+
10234+static inline void unionfs_mntput(struct dentry *dentry, int bindex)
10235+{
10236+ struct vfsmount *mnt;
10237+
10238+ if (!dentry && bindex < 0)
10239+ return;
10240+ BUG_ON(!dentry || bindex < 0);
10241+
10242+ mnt = unionfs_lower_mnt_idx(dentry, bindex);
10243+#ifdef CONFIG_UNION_FS_DEBUG
10244+ /*
10245+ * Directories can have NULL lower objects in between start/end, but
10246+ * NOT if at the start/end range. We cannot verify that this dentry
10247+ * is a type=DIR, because it may already be a negative dentry. But
10248+ * if dbstart is greater than dbend, we know that this couldn't have
10249+ * been a regular file: it had to have been a directory.
10250+ */
10251+ if (!mnt && !(bindex > dbstart(dentry) && bindex < dbend(dentry)))
10252+ pr_debug("unionfs: mntput: mnt=%p bindex=%d\n", mnt, bindex);
10253+#endif /* CONFIG_UNION_FS_DEBUG */
10254+ mntput(mnt);
10255+}
10256+
10257+#ifdef CONFIG_UNION_FS_DEBUG
10258+
10259+/* useful for tracking code reachability */
10260+#define UDBG pr_debug("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__)
10261+
10262+#define unionfs_check_inode(i) __unionfs_check_inode((i), \
10263+ __FILE__, __func__, __LINE__)
10264+#define unionfs_check_dentry(d) __unionfs_check_dentry((d), \
10265+ __FILE__, __func__, __LINE__)
10266+#define unionfs_check_file(f) __unionfs_check_file((f), \
10267+ __FILE__, __func__, __LINE__)
10268+#define unionfs_check_nd(n) __unionfs_check_nd((n), \
10269+ __FILE__, __func__, __LINE__)
10270+#define show_branch_counts(sb) __show_branch_counts((sb), \
10271+ __FILE__, __func__, __LINE__)
10272+#define show_inode_times(i) __show_inode_times((i), \
10273+ __FILE__, __func__, __LINE__)
10274+#define show_dinode_times(d) __show_dinode_times((d), \
10275+ __FILE__, __func__, __LINE__)
10276+#define show_inode_counts(i) __show_inode_counts((i), \
10277+ __FILE__, __func__, __LINE__)
10278+
10279+extern void __unionfs_check_inode(const struct inode *inode, const char *fname,
10280+ const char *fxn, int line);
10281+extern void __unionfs_check_dentry(const struct dentry *dentry,
10282+ const char *fname, const char *fxn,
10283+ int line);
10284+extern void __unionfs_check_file(const struct file *file,
10285+ const char *fname, const char *fxn, int line);
10286+extern void __unionfs_check_nd(const struct nameidata *nd,
10287+ const char *fname, const char *fxn, int line);
10288+extern void __show_branch_counts(const struct super_block *sb,
10289+ const char *file, const char *fxn, int line);
10290+extern void __show_inode_times(const struct inode *inode,
10291+ const char *file, const char *fxn, int line);
10292+extern void __show_dinode_times(const struct dentry *dentry,
10293+ const char *file, const char *fxn, int line);
10294+extern void __show_inode_counts(const struct inode *inode,
10295+ const char *file, const char *fxn, int line);
10296+
10297+#else /* not CONFIG_UNION_FS_DEBUG */
10298+
10299+/* we leave useful hooks for these check functions throughout the code */
10300+#define unionfs_check_inode(i) do { } while (0)
10301+#define unionfs_check_dentry(d) do { } while (0)
10302+#define unionfs_check_file(f) do { } while (0)
10303+#define unionfs_check_nd(n) do { } while (0)
10304+#define show_branch_counts(sb) do { } while (0)
10305+#define show_inode_times(i) do { } while (0)
10306+#define show_dinode_times(d) do { } while (0)
10307+#define show_inode_counts(i) do { } while (0)
10308+
10309+#endif /* not CONFIG_UNION_FS_DEBUG */
10310+
10311+#endif /* not _UNION_H_ */
10312diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c
10313new file mode 100644
10314index 0000000..bf447bb
10315--- /dev/null
10316+++ b/fs/unionfs/unlink.c
10317@@ -0,0 +1,278 @@
10318+/*
10319+ * Copyright (c) 2003-2011 Erez Zadok
10320+ * Copyright (c) 2003-2006 Charles P. Wright
10321+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10322+ * Copyright (c) 2005-2006 Junjiro Okajima
10323+ * Copyright (c) 2005 Arun M. Krishnakumar
10324+ * Copyright (c) 2004-2006 David P. Quigley
10325+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10326+ * Copyright (c) 2003 Puja Gupta
10327+ * Copyright (c) 2003 Harikesavan Krishnan
10328+ * Copyright (c) 2003-2011 Stony Brook University
10329+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
10330+ *
10331+ * This program is free software; you can redistribute it and/or modify
10332+ * it under the terms of the GNU General Public License version 2 as
10333+ * published by the Free Software Foundation.
10334+ */
10335+
10336+#include "union.h"
10337+
10338+/*
10339+ * Helper function for Unionfs's unlink operation.
10340+ *
10341+ * The main goal of this function is to optimize the unlinking of non-dir
10342+ * objects in unionfs by deleting all possible lower inode objects from the
10343+ * underlying branches having same dentry name as the non-dir dentry on
10344+ * which this unlink operation is called. This way we delete as many lower
10345+ * inodes as possible, and save space. Whiteouts need to be created in
10346+ * branch0 only if unlinking fails on any of the lower branch other than
10347+ * branch0, or if a lower branch is marked read-only.
10348+ *
10349+ * Also, while unlinking a file, if we encounter any dir type entry in any
10350+ * intermediate branch, then we remove the directory by calling vfs_rmdir.
10351+ * The following special cases are also handled:
10352+
10353+ * (1) If an error occurs in branch0 during vfs_unlink, then we return
10354+ * appropriate error.
10355+ *
10356+ * (2) If we get an error during unlink in any of other lower branch other
10357+ * than branch0, then we create a whiteout in branch0.
10358+ *
10359+ * (3) If a whiteout already exists in any intermediate branch, we delete
10360+ * all possible inodes only up to that branch (this is an "opaqueness"
10361+ * as as per Documentation/filesystems/unionfs/concepts.txt).
10362+ *
10363+ */
10364+static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry,
10365+ struct dentry *parent)
10366+{
10367+ struct dentry *lower_dentry;
10368+ struct dentry *lower_dir_dentry;
10369+ int bindex;
10370+ int err = 0;
10371+
10372+ err = unionfs_partial_lookup(dentry, parent);
10373+ if (err)
10374+ goto out;
10375+
10376+ /* trying to unlink all possible valid instances */
10377+ for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
10378+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10379+ if (!lower_dentry || !lower_dentry->d_inode)
10380+ continue;
10381+
10382+ lower_dir_dentry = lock_parent(lower_dentry);
10383+
10384+ /* avoid destroying the lower inode if the object is in use */
10385+ dget(lower_dentry);
10386+ err = is_robranch_super(dentry->d_sb, bindex);
10387+ if (!err) {
10388+ /* see Documentation/filesystems/unionfs/issues.txt */
10389+ lockdep_off();
10390+ if (!S_ISDIR(lower_dentry->d_inode->i_mode))
10391+ err = vfs_unlink(lower_dir_dentry->d_inode,
10392+ lower_dentry);
10393+ else
10394+ err = vfs_rmdir(lower_dir_dentry->d_inode,
10395+ lower_dentry);
10396+ lockdep_on();
10397+ }
10398+
10399+ /* if lower object deletion succeeds, update inode's times */
10400+ if (!err)
10401+ unionfs_copy_attr_times(dentry->d_inode);
10402+ dput(lower_dentry);
10403+ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10404+ unlock_dir(lower_dir_dentry);
10405+
10406+ if (err)
10407+ break;
10408+ }
10409+
10410+ /*
10411+ * Create the whiteout in branch 0 (highest priority) only if (a)
10412+ * there was an error in any intermediate branch other than branch 0
10413+ * due to failure of vfs_unlink/vfs_rmdir or (b) a branch marked or
10414+ * mounted read-only.
10415+ */
10416+ if (err) {
10417+ if ((bindex == 0) ||
10418+ ((bindex == dbstart(dentry)) &&
10419+ (!IS_COPYUP_ERR(err))))
10420+ goto out;
10421+ else {
10422+ if (!IS_COPYUP_ERR(err))
10423+ pr_debug("unionfs: lower object deletion "
10424+ "failed in branch:%d\n", bindex);
10425+ err = create_whiteout(dentry, sbstart(dentry->d_sb));
10426+ }
10427+ }
10428+
10429+out:
10430+ if (!err)
10431+ inode_dec_link_count(dentry->d_inode);
10432+
10433+ /* We don't want to leave negative leftover dentries for revalidate. */
10434+ if (!err && (dbopaque(dentry) != -1))
10435+ update_bstart(dentry);
10436+
10437+ return err;
10438+}
10439+
10440+int unionfs_unlink(struct inode *dir, struct dentry *dentry)
10441+{
10442+ int err = 0;
10443+ struct inode *inode = dentry->d_inode;
10444+ struct dentry *parent;
10445+ int valid;
10446+
10447+ BUG_ON(S_ISDIR(inode->i_mode));
10448+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10449+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
10450+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10451+
10452+ valid = __unionfs_d_revalidate(dentry, parent, false);
10453+ if (unlikely(!valid)) {
10454+ err = -ESTALE;
10455+ goto out;
10456+ }
10457+ unionfs_check_dentry(dentry);
10458+
10459+ err = unionfs_unlink_whiteout(dir, dentry, parent);
10460+ /* call d_drop so the system "forgets" about us */
10461+ if (!err) {
10462+ unionfs_postcopyup_release(dentry);
10463+ unionfs_postcopyup_setmnt(parent);
10464+ if (inode->i_nlink == 0) /* drop lower inodes */
10465+ iput_lowers_all(inode, false);
10466+ d_drop(dentry);
10467+ /*
10468+ * if unlink/whiteout succeeded, parent dir mtime has
10469+ * changed
10470+ */
10471+ unionfs_copy_attr_times(dir);
10472+ }
10473+
10474+out:
10475+ if (!err) {
10476+ unionfs_check_dentry(dentry);
10477+ unionfs_check_inode(dir);
10478+ }
10479+ unionfs_unlock_dentry(dentry);
10480+ unionfs_unlock_parent(dentry, parent);
10481+ unionfs_read_unlock(dentry->d_sb);
10482+ return err;
10483+}
10484+
10485+static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry,
10486+ struct unionfs_dir_state *namelist)
10487+{
10488+ int err;
10489+ struct dentry *lower_dentry;
10490+ struct dentry *lower_dir_dentry = NULL;
10491+
10492+ /* Here we need to remove whiteout entries. */
10493+ err = delete_whiteouts(dentry, dbstart(dentry), namelist);
10494+ if (err)
10495+ goto out;
10496+
10497+ lower_dentry = unionfs_lower_dentry(dentry);
10498+
10499+ lower_dir_dentry = lock_parent(lower_dentry);
10500+
10501+ /* avoid destroying the lower inode if the file is in use */
10502+ dget(lower_dentry);
10503+ err = is_robranch(dentry);
10504+ if (!err)
10505+ err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
10506+ dput(lower_dentry);
10507+
10508+ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10509+ /* propagate number of hard-links */
10510+ dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode);
10511+
10512+out:
10513+ if (lower_dir_dentry)
10514+ unlock_dir(lower_dir_dentry);
10515+ return err;
10516+}
10517+
10518+int unionfs_rmdir(struct inode *dir, struct dentry *dentry)
10519+{
10520+ int err = 0;
10521+ struct unionfs_dir_state *namelist = NULL;
10522+ struct dentry *parent;
10523+ int dstart, dend;
10524+ bool valid;
10525+
10526+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10527+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
10528+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10529+
10530+ valid = __unionfs_d_revalidate(dentry, parent, false);
10531+ if (unlikely(!valid)) {
10532+ err = -ESTALE;
10533+ goto out;
10534+ }
10535+ unionfs_check_dentry(dentry);
10536+
10537+ /* check if this unionfs directory is empty or not */
10538+ err = check_empty(dentry, parent, &namelist);
10539+ if (err)
10540+ goto out;
10541+
10542+ err = unionfs_rmdir_first(dir, dentry, namelist);
10543+ dstart = dbstart(dentry);
10544+ dend = dbend(dentry);
10545+ /*
10546+ * We create a whiteout for the directory if there was an error to
10547+ * rmdir the first directory entry in the union. Otherwise, we
10548+ * create a whiteout only if there is no chance that a lower
10549+ * priority branch might also have the same named directory. IOW,
10550+ * if there is not another same-named directory at a lower priority
10551+ * branch, then we don't need to create a whiteout for it.
10552+ */
10553+ if (!err) {
10554+ if (dstart < dend)
10555+ err = create_whiteout(dentry, dstart);
10556+ } else {
10557+ int new_err;
10558+
10559+ if (dstart == 0)
10560+ goto out;
10561+
10562+ /* exit if the error returned was NOT -EROFS */
10563+ if (!IS_COPYUP_ERR(err))
10564+ goto out;
10565+
10566+ new_err = create_whiteout(dentry, dstart - 1);
10567+ if (new_err != -EEXIST)
10568+ err = new_err;
10569+ }
10570+
10571+out:
10572+ /*
10573+ * Drop references to lower dentry/inode so storage space for them
10574+ * can be reclaimed. Then, call d_drop so the system "forgets"
10575+ * about us.
10576+ */
10577+ if (!err) {
10578+ iput_lowers_all(dentry->d_inode, false);
10579+ dput(unionfs_lower_dentry_idx(dentry, dstart));
10580+ unionfs_set_lower_dentry_idx(dentry, dstart, NULL);
10581+ d_drop(dentry);
10582+ /* update our lower vfsmnts, in case a copyup took place */
10583+ unionfs_postcopyup_setmnt(dentry);
10584+ unionfs_check_dentry(dentry);
10585+ unionfs_check_inode(dir);
10586+ }
10587+
10588+ if (namelist)
10589+ free_rdstate(namelist);
10590+
10591+ unionfs_unlock_dentry(dentry);
10592+ unionfs_unlock_parent(dentry, parent);
10593+ unionfs_read_unlock(dentry->d_sb);
10594+ return err;
10595+}
10596diff --git a/fs/unionfs/whiteout.c b/fs/unionfs/whiteout.c
10597new file mode 100644
10598index 0000000..582cef2
10599--- /dev/null
10600+++ b/fs/unionfs/whiteout.c
10601@@ -0,0 +1,601 @@
10602+/*
10603+ * Copyright (c) 2003-2011 Erez Zadok
10604+ * Copyright (c) 2003-2006 Charles P. Wright
10605+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10606+ * Copyright (c) 2005-2006 Junjiro Okajima
10607+ * Copyright (c) 2005 Arun M. Krishnakumar
10608+ * Copyright (c) 2004-2006 David P. Quigley
10609+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10610+ * Copyright (c) 2003 Puja Gupta
10611+ * Copyright (c) 2003 Harikesavan Krishnan
10612+ * Copyright (c) 2003-2011 Stony Brook University
10613+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
10614+ *
10615+ * This program is free software; you can redistribute it and/or modify
10616+ * it under the terms of the GNU General Public License version 2 as
10617+ * published by the Free Software Foundation.
10618+ */
10619+
10620+#include "union.h"
10621+
10622+/*
10623+ * whiteout and opaque directory helpers
10624+ */
10625+
10626+/* What do we use for whiteouts. */
10627+#define UNIONFS_WHPFX ".wh."
10628+#define UNIONFS_WHLEN 4
10629+/*
10630+ * If a directory contains this file, then it is opaque. We start with the
10631+ * .wh. flag so that it is blocked by lookup.
10632+ */
10633+#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque"
10634+#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME
10635+
10636+/* construct whiteout filename */
10637+char *alloc_whname(const char *name, int len)
10638+{
10639+ char *buf;
10640+
10641+ buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL);
10642+ if (unlikely(!buf))
10643+ return ERR_PTR(-ENOMEM);
10644+
10645+ strcpy(buf, UNIONFS_WHPFX);
10646+ strlcat(buf, name, len + UNIONFS_WHLEN + 1);
10647+
10648+ return buf;
10649+}
10650+
10651+/*
10652+ * XXX: this can be inline or CPP macro, but is here to keep all whiteout
10653+ * code in one place.
10654+ */
10655+void unionfs_set_max_namelen(long *namelen)
10656+{
10657+ *namelen -= UNIONFS_WHLEN;
10658+}
10659+
10660+/* check if @namep is a whiteout, update @namep and @namelenp accordingly */
10661+bool is_whiteout_name(char **namep, int *namelenp)
10662+{
10663+ if (*namelenp > UNIONFS_WHLEN &&
10664+ !strncmp(*namep, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
10665+ *namep += UNIONFS_WHLEN;
10666+ *namelenp -= UNIONFS_WHLEN;
10667+ return true;
10668+ }
10669+ return false;
10670+}
10671+
10672+/* is the filename valid == !(whiteout for a file or opaque dir marker) */
10673+bool is_validname(const char *name)
10674+{
10675+ if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN))
10676+ return false;
10677+ if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME,
10678+ sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1))
10679+ return false;
10680+ return true;
10681+}
10682+
10683+/*
10684+ * Look for a whiteout @name in @lower_parent directory. If error, return
10685+ * ERR_PTR. Caller must dput() the returned dentry if not an error.
10686+ *
10687+ * XXX: some callers can reuse the whname allocated buffer to avoid repeated
10688+ * free then re-malloc calls. Need to provide a different API for those
10689+ * callers.
10690+ */
10691+struct dentry *lookup_whiteout(const char *name, struct dentry *lower_parent)
10692+{
10693+ char *whname = NULL;
10694+ int err = 0, namelen;
10695+ struct dentry *wh_dentry = NULL;
10696+
10697+ namelen = strlen(name);
10698+ whname = alloc_whname(name, namelen);
10699+ if (unlikely(IS_ERR(whname))) {
10700+ err = PTR_ERR(whname);
10701+ goto out;
10702+ }
10703+
10704+ /* check if whiteout exists in this branch: lookup .wh.foo */
10705+ wh_dentry = lookup_lck_len(whname, lower_parent, strlen(whname));
10706+ if (IS_ERR(wh_dentry)) {
10707+ err = PTR_ERR(wh_dentry);
10708+ goto out;
10709+ }
10710+
10711+ /* check if negative dentry (ENOENT) */
10712+ if (!wh_dentry->d_inode)
10713+ goto out;
10714+
10715+ /* whiteout found: check if valid type */
10716+ if (!S_ISREG(wh_dentry->d_inode->i_mode)) {
10717+ printk(KERN_ERR "unionfs: invalid whiteout %s entry type %d\n",
10718+ whname, wh_dentry->d_inode->i_mode);
10719+ dput(wh_dentry);
10720+ err = -EIO;
10721+ goto out;
10722+ }
10723+
10724+out:
10725+ kfree(whname);
10726+ if (err)
10727+ wh_dentry = ERR_PTR(err);
10728+ return wh_dentry;
10729+}
10730+
10731+/* find and return first whiteout in parent directory, else ENOENT */
10732+struct dentry *find_first_whiteout(struct dentry *dentry)
10733+{
10734+ int bindex, bstart, bend;
10735+ struct dentry *parent, *lower_parent, *wh_dentry;
10736+
10737+ parent = dget_parent(dentry);
10738+
10739+ bstart = dbstart(parent);
10740+ bend = dbend(parent);
10741+ wh_dentry = ERR_PTR(-ENOENT);
10742+
10743+ for (bindex = bstart; bindex <= bend; bindex++) {
10744+ lower_parent = unionfs_lower_dentry_idx(parent, bindex);
10745+ if (!lower_parent)
10746+ continue;
10747+ wh_dentry = lookup_whiteout(dentry->d_name.name, lower_parent);
10748+ if (IS_ERR(wh_dentry))
10749+ continue;
10750+ if (wh_dentry->d_inode)
10751+ break;
10752+ dput(wh_dentry);
10753+ wh_dentry = ERR_PTR(-ENOENT);
10754+ }
10755+
10756+ dput(parent);
10757+
10758+ return wh_dentry;
10759+}
10760+
10761+/*
10762+ * Unlink a whiteout dentry. Returns 0 or -errno. Caller must hold and
10763+ * release dentry reference.
10764+ */
10765+int unlink_whiteout(struct dentry *wh_dentry)
10766+{
10767+ int err;
10768+ struct dentry *lower_dir_dentry;
10769+
10770+ /* dget and lock parent dentry */
10771+ lower_dir_dentry = lock_parent_wh(wh_dentry);
10772+
10773+ /* see Documentation/filesystems/unionfs/issues.txt */
10774+ lockdep_off();
10775+ err = vfs_unlink(lower_dir_dentry->d_inode, wh_dentry);
10776+ lockdep_on();
10777+ unlock_dir(lower_dir_dentry);
10778+
10779+ /*
10780+ * Whiteouts are special files and should be deleted no matter what
10781+ * (as if they never existed), in order to allow this create
10782+ * operation to succeed. This is especially important in sticky
10783+ * directories: a whiteout may have been created by one user, but
10784+ * the newly created file may be created by another user.
10785+ * Therefore, in order to maintain Unix semantics, if the vfs_unlink
10786+ * above failed, then we have to try to directly unlink the
10787+ * whiteout. Note: in the ODF version of unionfs, whiteout are
10788+ * handled much more cleanly.
10789+ */
10790+ if (err == -EPERM) {
10791+ struct inode *inode = lower_dir_dentry->d_inode;
10792+ err = inode->i_op->unlink(inode, wh_dentry);
10793+ }
10794+ if (err)
10795+ printk(KERN_ERR "unionfs: could not unlink whiteout %s, "
10796+ "err = %d\n", wh_dentry->d_name.name, err);
10797+
10798+ return err;
10799+
10800+}
10801+
10802+/*
10803+ * Helper function when creating new objects (create, symlink, mknod, etc.).
10804+ * Checks to see if there's a whiteout in @lower_dentry's parent directory,
10805+ * whose name is taken from @dentry. Then tries to remove that whiteout, if
10806+ * found. If <dentry,bindex> is a branch marked readonly, return -EROFS.
10807+ * If it finds both a regular file and a whiteout, delete whiteout (this
10808+ * should never happen).
10809+ *
10810+ * Return 0 if no whiteout was found. Return 1 if one was found and
10811+ * successfully removed. Therefore a value >= 0 tells the caller that
10812+ * @lower_dentry belongs to a good branch to create the new object in).
10813+ * Return -ERRNO if an error occurred during whiteout lookup or in trying to
10814+ * unlink the whiteout.
10815+ */
10816+int check_unlink_whiteout(struct dentry *dentry, struct dentry *lower_dentry,
10817+ int bindex)
10818+{
10819+ int err;
10820+ struct dentry *wh_dentry = NULL;
10821+ struct dentry *lower_dir_dentry = NULL;
10822+
10823+ /* look for whiteout dentry first */
10824+ lower_dir_dentry = dget_parent(lower_dentry);
10825+ wh_dentry = lookup_whiteout(dentry->d_name.name, lower_dir_dentry);
10826+ dput(lower_dir_dentry);
10827+ if (IS_ERR(wh_dentry)) {
10828+ err = PTR_ERR(wh_dentry);
10829+ goto out;
10830+ }
10831+
10832+ if (!wh_dentry->d_inode) { /* no whiteout exists*/
10833+ err = 0;
10834+ goto out_dput;
10835+ }
10836+
10837+ /* check if regular file and whiteout were both found */
10838+ if (unlikely(lower_dentry->d_inode))
10839+ printk(KERN_WARNING "unionfs: removing whiteout; regular "
10840+ "file exists in directory %s (branch %d)\n",
10841+ lower_dir_dentry->d_name.name, bindex);
10842+
10843+ /* check if branch is writeable */
10844+ err = is_robranch_super(dentry->d_sb, bindex);
10845+ if (err)
10846+ goto out_dput;
10847+
10848+ /* .wh.foo has been found, so let's unlink it */
10849+ err = unlink_whiteout(wh_dentry);
10850+ if (!err)
10851+ err = 1; /* a whiteout was found and successfully removed */
10852+out_dput:
10853+ dput(wh_dentry);
10854+out:
10855+ return err;
10856+}
10857+
10858+/*
10859+ * Pass an unionfs dentry and an index. It will try to create a whiteout
10860+ * for the filename in dentry, and will try in branch 'index'. On error,
10861+ * it will proceed to a branch to the left.
10862+ */
10863+int create_whiteout(struct dentry *dentry, int start)
10864+{
10865+ int bstart, bend, bindex;
10866+ struct dentry *lower_dir_dentry;
10867+ struct dentry *lower_dentry;
10868+ struct dentry *lower_wh_dentry;
10869+ struct nameidata nd;
10870+ char *name = NULL;
10871+ int err = -EINVAL;
10872+
10873+ verify_locked(dentry);
10874+
10875+ bstart = dbstart(dentry);
10876+ bend = dbend(dentry);
10877+
10878+ /* create dentry's whiteout equivalent */
10879+ name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
10880+ if (unlikely(IS_ERR(name))) {
10881+ err = PTR_ERR(name);
10882+ goto out;
10883+ }
10884+
10885+ for (bindex = start; bindex >= 0; bindex--) {
10886+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10887+
10888+ if (!lower_dentry) {
10889+ /*
10890+ * if lower dentry is not present, create the
10891+ * entire lower dentry directory structure and go
10892+ * ahead. Since we want to just create whiteout, we
10893+ * only want the parent dentry, and hence get rid of
10894+ * this dentry.
10895+ */
10896+ lower_dentry = create_parents(dentry->d_inode,
10897+ dentry,
10898+ dentry->d_name.name,
10899+ bindex);
10900+ if (!lower_dentry || IS_ERR(lower_dentry)) {
10901+ int ret = PTR_ERR(lower_dentry);
10902+ if (!IS_COPYUP_ERR(ret))
10903+ printk(KERN_ERR
10904+ "unionfs: create_parents for "
10905+ "whiteout failed: bindex=%d "
10906+ "err=%d\n", bindex, ret);
10907+ continue;
10908+ }
10909+ }
10910+
10911+ lower_wh_dentry =
10912+ lookup_lck_len(name, lower_dentry->d_parent,
10913+ dentry->d_name.len + UNIONFS_WHLEN);
10914+ if (IS_ERR(lower_wh_dentry))
10915+ continue;
10916+
10917+ /*
10918+ * The whiteout already exists. This used to be impossible,
10919+ * but now is possible because of opaqueness.
10920+ */
10921+ if (lower_wh_dentry->d_inode) {
10922+ dput(lower_wh_dentry);
10923+ err = 0;
10924+ goto out;
10925+ }
10926+
10927+ err = init_lower_nd(&nd, LOOKUP_CREATE);
10928+ if (unlikely(err < 0))
10929+ goto out;
10930+ lower_dir_dentry = lock_parent_wh(lower_wh_dentry);
10931+ err = is_robranch_super(dentry->d_sb, bindex);
10932+ if (!err)
10933+ err = vfs_create(lower_dir_dentry->d_inode,
10934+ lower_wh_dentry,
10935+ current_umask() & S_IRUGO,
10936+ &nd);
10937+ unlock_dir(lower_dir_dentry);
10938+ dput(lower_wh_dentry);
10939+ release_lower_nd(&nd, err);
10940+
10941+ if (!err || !IS_COPYUP_ERR(err))
10942+ break;
10943+ }
10944+
10945+ /* set dbopaque so that lookup will not proceed after this branch */
10946+ if (!err)
10947+ dbopaque(dentry) = bindex;
10948+
10949+out:
10950+ kfree(name);
10951+ return err;
10952+}
10953+
10954+/*
10955+ * Delete all of the whiteouts in a given directory for rmdir.
10956+ *
10957+ * lower directory inode should be locked
10958+ */
10959+static int do_delete_whiteouts(struct dentry *dentry, int bindex,
10960+ struct unionfs_dir_state *namelist)
10961+{
10962+ int err = 0;
10963+ struct dentry *lower_dir_dentry = NULL;
10964+ struct dentry *lower_dentry;
10965+ char *name = NULL, *p;
10966+ struct inode *lower_dir;
10967+ int i;
10968+ struct list_head *pos;
10969+ struct filldir_node *cursor;
10970+
10971+ /* Find out lower parent dentry */
10972+ lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10973+ BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
10974+ lower_dir = lower_dir_dentry->d_inode;
10975+ BUG_ON(!S_ISDIR(lower_dir->i_mode));
10976+
10977+ err = -ENOMEM;
10978+ name = __getname();
10979+ if (unlikely(!name))
10980+ goto out;
10981+ strcpy(name, UNIONFS_WHPFX);
10982+ p = name + UNIONFS_WHLEN;
10983+
10984+ err = 0;
10985+ for (i = 0; !err && i < namelist->size; i++) {
10986+ list_for_each(pos, &namelist->list[i]) {
10987+ cursor =
10988+ list_entry(pos, struct filldir_node,
10989+ file_list);
10990+ /* Only operate on whiteouts in this branch. */
10991+ if (cursor->bindex != bindex)
10992+ continue;
10993+ if (!cursor->whiteout)
10994+ continue;
10995+
10996+ strlcpy(p, cursor->name, PATH_MAX - UNIONFS_WHLEN);
10997+ lower_dentry =
10998+ lookup_lck_len(name, lower_dir_dentry,
10999+ cursor->namelen +
11000+ UNIONFS_WHLEN);
11001+ if (IS_ERR(lower_dentry)) {
11002+ err = PTR_ERR(lower_dentry);
11003+ break;
11004+ }
11005+ if (lower_dentry->d_inode)
11006+ err = vfs_unlink(lower_dir, lower_dentry);
11007+ dput(lower_dentry);
11008+ if (err)
11009+ break;
11010+ }
11011+ }
11012+
11013+ __putname(name);
11014+
11015+ /* After all of the removals, we should copy the attributes once. */
11016+ fsstack_copy_attr_times(dentry->d_inode, lower_dir_dentry->d_inode);
11017+
11018+out:
11019+ return err;
11020+}
11021+
11022+
11023+void __delete_whiteouts(struct work_struct *work)
11024+{
11025+ struct sioq_args *args = container_of(work, struct sioq_args, work);
11026+ struct deletewh_args *d = &args->deletewh;
11027+
11028+ args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist);
11029+ complete(&args->comp);
11030+}
11031+
11032+/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */
11033+int delete_whiteouts(struct dentry *dentry, int bindex,
11034+ struct unionfs_dir_state *namelist)
11035+{
11036+ int err;
11037+ struct super_block *sb;
11038+ struct dentry *lower_dir_dentry;
11039+ struct inode *lower_dir;
11040+ struct sioq_args args;
11041+
11042+ sb = dentry->d_sb;
11043+
11044+ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
11045+ BUG_ON(bindex < dbstart(dentry));
11046+ BUG_ON(bindex > dbend(dentry));
11047+ err = is_robranch_super(sb, bindex);
11048+ if (err)
11049+ goto out;
11050+
11051+ lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
11052+ BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
11053+ lower_dir = lower_dir_dentry->d_inode;
11054+ BUG_ON(!S_ISDIR(lower_dir->i_mode));
11055+
11056+ if (!inode_permission(lower_dir, MAY_WRITE | MAY_EXEC)) {
11057+ err = do_delete_whiteouts(dentry, bindex, namelist);
11058+ } else {
11059+ args.deletewh.namelist = namelist;
11060+ args.deletewh.dentry = dentry;
11061+ args.deletewh.bindex = bindex;
11062+ run_sioq(__delete_whiteouts, &args);
11063+ err = args.err;
11064+ }
11065+
11066+out:
11067+ return err;
11068+}
11069+
11070+/****************************************************************************
11071+ * Opaque directory helpers *
11072+ ****************************************************************************/
11073+
11074+/*
11075+ * is_opaque_dir: returns 0 if it is NOT an opaque dir, 1 if it is, and
11076+ * -errno if an error occurred trying to figure this out.
11077+ */
11078+int is_opaque_dir(struct dentry *dentry, int bindex)
11079+{
11080+ int err = 0;
11081+ struct dentry *lower_dentry;
11082+ struct dentry *wh_lower_dentry;
11083+ struct inode *lower_inode;
11084+ struct sioq_args args;
11085+ struct nameidata lower_nd;
11086+
11087+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
11088+ lower_inode = lower_dentry->d_inode;
11089+
11090+ BUG_ON(!S_ISDIR(lower_inode->i_mode));
11091+
11092+ mutex_lock(&lower_inode->i_mutex);
11093+
11094+ if (!inode_permission(lower_inode, MAY_EXEC)) {
11095+ err = init_lower_nd(&lower_nd, LOOKUP_OPEN);
11096+ if (unlikely(err < 0)) {
11097+ mutex_unlock(&lower_inode->i_mutex);
11098+ goto out;
11099+ }
11100+ wh_lower_dentry =
11101+ lookup_one_len_nd(UNIONFS_DIR_OPAQUE, lower_dentry,
11102+ sizeof(UNIONFS_DIR_OPAQUE) - 1,
11103+ &lower_nd);
11104+ release_lower_nd(&lower_nd, err);
11105+ } else {
11106+ args.is_opaque.dentry = lower_dentry;
11107+ run_sioq(__is_opaque_dir, &args);
11108+ wh_lower_dentry = args.ret;
11109+ }
11110+
11111+ mutex_unlock(&lower_inode->i_mutex);
11112+
11113+ if (IS_ERR(wh_lower_dentry)) {
11114+ err = PTR_ERR(wh_lower_dentry);
11115+ goto out;
11116+ }
11117+
11118+ /* This is an opaque dir iff wh_lower_dentry is positive */
11119+ err = !!wh_lower_dentry->d_inode;
11120+
11121+ dput(wh_lower_dentry);
11122+out:
11123+ return err;
11124+}
11125+
11126+void __is_opaque_dir(struct work_struct *work)
11127+{
11128+ struct sioq_args *args = container_of(work, struct sioq_args, work);
11129+ struct nameidata lower_nd;
11130+ int err;
11131+
11132+ err = init_lower_nd(&lower_nd, LOOKUP_OPEN);
11133+ if (unlikely(err < 0))
11134+ return;
11135+ args->ret = lookup_one_len_nd(UNIONFS_DIR_OPAQUE,
11136+ args->is_opaque.dentry,
11137+ sizeof(UNIONFS_DIR_OPAQUE) - 1,
11138+ &lower_nd);
11139+ release_lower_nd(&lower_nd, err);
11140+ complete(&args->comp);
11141+}
11142+
11143+int make_dir_opaque(struct dentry *dentry, int bindex)
11144+{
11145+ int err = 0;
11146+ struct dentry *lower_dentry, *diropq;
11147+ struct inode *lower_dir;
11148+ struct nameidata nd;
11149+ const struct cred *old_creds;
11150+ struct cred *new_creds;
11151+
11152+ /*
11153+ * Opaque directory whiteout markers are special files (like regular
11154+ * whiteouts), and should appear to the users as if they don't
11155+ * exist. They should be created/deleted regardless of directory
11156+ * search/create permissions, but only for the duration of this
11157+ * creation of the .wh.__dir_opaque: file. Note, this does not
11158+ * circumvent normal ->permission).
11159+ */
11160+ new_creds = prepare_creds();
11161+ if (unlikely(!new_creds)) {
11162+ err = -ENOMEM;
11163+ goto out_err;
11164+ }
11165+ cap_raise(new_creds->cap_effective, CAP_DAC_READ_SEARCH);
11166+ cap_raise(new_creds->cap_effective, CAP_DAC_OVERRIDE);
11167+ old_creds = override_creds(new_creds);
11168+
11169+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
11170+ lower_dir = lower_dentry->d_inode;
11171+ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) ||
11172+ !S_ISDIR(lower_dir->i_mode));
11173+
11174+ mutex_lock(&lower_dir->i_mutex);
11175+ err = init_lower_nd(&nd, LOOKUP_OPEN);
11176+ if (unlikely(err < 0))
11177+ goto out;
11178+ diropq = lookup_one_len_nd(UNIONFS_DIR_OPAQUE, lower_dentry,
11179+ sizeof(UNIONFS_DIR_OPAQUE) - 1, &nd);
11180+ release_lower_nd(&nd, err);
11181+ if (IS_ERR(diropq)) {
11182+ err = PTR_ERR(diropq);
11183+ goto out;
11184+ }
11185+
11186+ err = init_lower_nd(&nd, LOOKUP_CREATE);
11187+ if (unlikely(err < 0))
11188+ goto out;
11189+ if (!diropq->d_inode)
11190+ err = vfs_create(lower_dir, diropq, S_IRUGO, &nd);
11191+ if (!err)
11192+ dbopaque(dentry) = bindex;
11193+ release_lower_nd(&nd, err);
11194+
11195+ dput(diropq);
11196+
11197+out:
11198+ mutex_unlock(&lower_dir->i_mutex);
11199+ revert_creds(old_creds);
11200+out_err:
11201+ return err;
11202+}
11203diff --git a/fs/unionfs/xattr.c b/fs/unionfs/xattr.c
11204new file mode 100644
11205index 0000000..a93d803
11206--- /dev/null
11207+++ b/fs/unionfs/xattr.c
11208@@ -0,0 +1,173 @@
11209+/*
11210+ * Copyright (c) 2003-2011 Erez Zadok
11211+ * Copyright (c) 2003-2006 Charles P. Wright
11212+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
11213+ * Copyright (c) 2005-2006 Junjiro Okajima
11214+ * Copyright (c) 2005 Arun M. Krishnakumar
11215+ * Copyright (c) 2004-2006 David P. Quigley
11216+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
11217+ * Copyright (c) 2003 Puja Gupta
11218+ * Copyright (c) 2003 Harikesavan Krishnan
11219+ * Copyright (c) 2003-2011 Stony Brook University
11220+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
11221+ *
11222+ * This program is free software; you can redistribute it and/or modify
11223+ * it under the terms of the GNU General Public License version 2 as
11224+ * published by the Free Software Foundation.
11225+ */
11226+
11227+#include "union.h"
11228+
11229+/* This is lifted from fs/xattr.c */
11230+void *unionfs_xattr_alloc(size_t size, size_t limit)
11231+{
11232+ void *ptr;
11233+
11234+ if (size > limit)
11235+ return ERR_PTR(-E2BIG);
11236+
11237+ if (!size) /* size request, no buffer is needed */
11238+ return NULL;
11239+
11240+ ptr = kmalloc(size, GFP_KERNEL);
11241+ if (unlikely(!ptr))
11242+ return ERR_PTR(-ENOMEM);
11243+ return ptr;
11244+}
11245+
11246+/*
11247+ * BKL held by caller.
11248+ * dentry->d_inode->i_mutex locked
11249+ */
11250+ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value,
11251+ size_t size)
11252+{
11253+ struct dentry *lower_dentry = NULL;
11254+ struct dentry *parent;
11255+ int err = -EOPNOTSUPP;
11256+ bool valid;
11257+
11258+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11259+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11260+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11261+
11262+ valid = __unionfs_d_revalidate(dentry, parent, false);
11263+ if (unlikely(!valid)) {
11264+ err = -ESTALE;
11265+ goto out;
11266+ }
11267+
11268+ lower_dentry = unionfs_lower_dentry(dentry);
11269+
11270+ err = vfs_getxattr(lower_dentry, (char *) name, value, size);
11271+
11272+out:
11273+ unionfs_check_dentry(dentry);
11274+ unionfs_unlock_dentry(dentry);
11275+ unionfs_unlock_parent(dentry, parent);
11276+ unionfs_read_unlock(dentry->d_sb);
11277+ return err;
11278+}
11279+
11280+/*
11281+ * BKL held by caller.
11282+ * dentry->d_inode->i_mutex locked
11283+ */
11284+int unionfs_setxattr(struct dentry *dentry, const char *name,
11285+ const void *value, size_t size, int flags)
11286+{
11287+ struct dentry *lower_dentry = NULL;
11288+ struct dentry *parent;
11289+ int err = -EOPNOTSUPP;
11290+ bool valid;
11291+
11292+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11293+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11294+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11295+
11296+ valid = __unionfs_d_revalidate(dentry, parent, false);
11297+ if (unlikely(!valid)) {
11298+ err = -ESTALE;
11299+ goto out;
11300+ }
11301+
11302+ lower_dentry = unionfs_lower_dentry(dentry);
11303+
11304+ err = vfs_setxattr(lower_dentry, (char *) name, (void *) value,
11305+ size, flags);
11306+
11307+out:
11308+ unionfs_check_dentry(dentry);
11309+ unionfs_unlock_dentry(dentry);
11310+ unionfs_unlock_parent(dentry, parent);
11311+ unionfs_read_unlock(dentry->d_sb);
11312+ return err;
11313+}
11314+
11315+/*
11316+ * BKL held by caller.
11317+ * dentry->d_inode->i_mutex locked
11318+ */
11319+int unionfs_removexattr(struct dentry *dentry, const char *name)
11320+{
11321+ struct dentry *lower_dentry = NULL;
11322+ struct dentry *parent;
11323+ int err = -EOPNOTSUPP;
11324+ bool valid;
11325+
11326+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11327+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11328+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11329+
11330+ valid = __unionfs_d_revalidate(dentry, parent, false);
11331+ if (unlikely(!valid)) {
11332+ err = -ESTALE;
11333+ goto out;
11334+ }
11335+
11336+ lower_dentry = unionfs_lower_dentry(dentry);
11337+
11338+ err = vfs_removexattr(lower_dentry, (char *) name);
11339+
11340+out:
11341+ unionfs_check_dentry(dentry);
11342+ unionfs_unlock_dentry(dentry);
11343+ unionfs_unlock_parent(dentry, parent);
11344+ unionfs_read_unlock(dentry->d_sb);
11345+ return err;
11346+}
11347+
11348+/*
11349+ * BKL held by caller.
11350+ * dentry->d_inode->i_mutex locked
11351+ */
11352+ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size)
11353+{
11354+ struct dentry *lower_dentry = NULL;
11355+ struct dentry *parent;
11356+ int err = -EOPNOTSUPP;
11357+ char *encoded_list = NULL;
11358+ bool valid;
11359+
11360+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11361+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11362+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11363+
11364+ valid = __unionfs_d_revalidate(dentry, parent, false);
11365+ if (unlikely(!valid)) {
11366+ err = -ESTALE;
11367+ goto out;
11368+ }
11369+
11370+ lower_dentry = unionfs_lower_dentry(dentry);
11371+
11372+ encoded_list = list;
11373+ err = vfs_listxattr(lower_dentry, encoded_list, size);
11374+
11375+out:
11376+ unionfs_check_dentry(dentry);
11377+ unionfs_unlock_dentry(dentry);
11378+ unionfs_unlock_parent(dentry, parent);
11379+ unionfs_read_unlock(dentry->d_sb);
11380+ return err;
11381+}
11382diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
11383index da317c7..64f1ced 100644
11384--- a/include/linux/fs_stack.h
11385+++ b/include/linux/fs_stack.h
11386@@ -1,7 +1,19 @@
11387+/*
11388+ * Copyright (c) 2006-2009 Erez Zadok
11389+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
11390+ * Copyright (c) 2006-2009 Stony Brook University
11391+ * Copyright (c) 2006-2009 The Research Foundation of SUNY
11392+ *
11393+ * This program is free software; you can redistribute it and/or modify
11394+ * it under the terms of the GNU General Public License version 2 as
11395+ * published by the Free Software Foundation.
11396+ */
11397+
11398 #ifndef _LINUX_FS_STACK_H
11399 #define _LINUX_FS_STACK_H
11400
11401-/* This file defines generic functions used primarily by stackable
11402+/*
11403+ * This file defines generic functions used primarily by stackable
11404 * filesystems; none of these functions require i_mutex to be held.
11405 */
11406
11407diff --git a/include/linux/magic.h b/include/linux/magic.h
11408index 1e5df2a..01ee54d 100644
11409--- a/include/linux/magic.h
11410+++ b/include/linux/magic.h
11411@@ -50,6 +50,8 @@
11412 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
11413 #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs"
11414
11415+#define UNIONFS_SUPER_MAGIC 0xf15f083d
11416+
11417 #define SMB_SUPER_MAGIC 0x517B
11418 #define USBDEVICE_SUPER_MAGIC 0x9fa2
11419 #define CGROUP_SUPER_MAGIC 0x27e0eb
11420diff --git a/include/linux/namei.h b/include/linux/namei.h
11421index eba45ea..8e19e9c 100644
11422--- a/include/linux/namei.h
11423+++ b/include/linux/namei.h
11424@@ -81,8 +81,11 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
11425
11426 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
11427 int (*open)(struct inode *, struct file *));
11428+extern void release_open_intent(struct nameidata *);
11429
11430 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
11431+extern struct dentry *lookup_one_len_nd(const char *, struct dentry *, int,
11432+ struct nameidata *nd);
11433
11434 extern int follow_down_one(struct path *);
11435 extern int follow_down(struct path *);
11436diff --git a/include/linux/splice.h b/include/linux/splice.h
11437index 997c3b4..54f5501 100644
11438--- a/include/linux/splice.h
11439+++ b/include/linux/splice.h
11440@@ -81,6 +81,11 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *,
11441 struct splice_pipe_desc *);
11442 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
11443 splice_direct_actor *);
11444+extern long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out,
11445+ loff_t *ppos, size_t len, unsigned int flags);
11446+extern long vfs_splice_to(struct file *in, loff_t *ppos,
11447+ struct pipe_inode_info *pipe, size_t len,
11448+ unsigned int flags);
11449
11450 /*
11451 * for dynamic pipe sizing
11452diff --git a/include/linux/union_fs.h b/include/linux/union_fs.h
11453new file mode 100644
11454index 0000000..c84d97e
11455--- /dev/null
11456+++ b/include/linux/union_fs.h
11457@@ -0,0 +1,22 @@
11458+/*
11459+ * Copyright (c) 2003-2009 Erez Zadok
11460+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
11461+ * Copyright (c) 2003-2009 Stony Brook University
11462+ * Copyright (c) 2003-2009 The Research Foundation of SUNY
11463+ *
11464+ * This program is free software; you can redistribute it and/or modify
11465+ * it under the terms of the GNU General Public License version 2 as
11466+ * published by the Free Software Foundation.
11467+ */
11468+
11469+#ifndef _LINUX_UNION_FS_H
11470+#define _LINUX_UNION_FS_H
11471+
11472+/*
11473+ * DEFINITIONS FOR USER AND KERNEL CODE:
11474+ */
11475+# define UNIONFS_IOCTL_INCGEN _IOR(0x15, 11, int)
11476+# define UNIONFS_IOCTL_QUERYFILE _IOR(0x15, 15, int)
11477+
11478+#endif /* _LINUX_UNIONFS_H */
11479+
11480diff --git a/security/security.c b/security/security.c
11481index 4ba6d4c..093d8b4 100644
11482--- a/security/security.c
11483+++ b/security/security.c
11484@@ -520,6 +520,7 @@ int security_inode_permission(struct inode *inode, int mask)
11485 return 0;
11486 return security_ops->inode_permission(inode, mask, 0);
11487 }
11488+EXPORT_SYMBOL(security_inode_permission);
11489
11490 int security_inode_exec_permission(struct inode *inode, unsigned int flags)
11491 {
11492--
114931.6.6.1
11494