diff options
Diffstat (limited to 'recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8')
11 files changed, 1693 insertions, 0 deletions
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0001-i387-math_state_restore-isn-t-called-from-asm.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0001-i387-math_state_restore-isn-t-called-from-asm.patch new file mode 100644 index 00000000..713171ef --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0001-i387-math_state_restore-isn-t-called-from-asm.patch | |||
@@ -0,0 +1,53 @@ | |||
1 | From 4733009df6d45db10f1f7551e65147576f224a06 Mon Sep 17 00:00:00 2001 | ||
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | ||
3 | Date: Mon, 13 Feb 2012 13:47:25 -0800 | ||
4 | Subject: [PATCH 01/11] i387: math_state_restore() isn't called from asm | ||
5 | |||
6 | commit be98c2cdb15ba26148cd2bd58a857d4f7759ed38 upstream. | ||
7 | |||
8 | It was marked asmlinkage for some really old and stale legacy reasons. | ||
9 | Fix that and the equally stale comment. | ||
10 | |||
11 | Noticed when debugging the irq_fpu_usable() bugs. | ||
12 | |||
13 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
14 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
15 | --- | ||
16 | arch/x86/include/asm/i387.h | 2 +- | ||
17 | arch/x86/kernel/traps.c | 6 +++--- | ||
18 | 2 files changed, 4 insertions(+), 4 deletions(-) | ||
19 | |||
20 | diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h | ||
21 | index c9e09ea..cba1432 100644 | ||
22 | --- a/arch/x86/include/asm/i387.h | ||
23 | +++ b/arch/x86/include/asm/i387.h | ||
24 | @@ -29,7 +29,7 @@ extern unsigned int sig_xstate_size; | ||
25 | extern void fpu_init(void); | ||
26 | extern void mxcsr_feature_mask_init(void); | ||
27 | extern int init_fpu(struct task_struct *child); | ||
28 | -extern asmlinkage void math_state_restore(void); | ||
29 | +extern void math_state_restore(void); | ||
30 | extern void __math_state_restore(void); | ||
31 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | ||
32 | |||
33 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | ||
34 | index a8e3eb8..727e6c1 100644 | ||
35 | --- a/arch/x86/kernel/traps.c | ||
36 | +++ b/arch/x86/kernel/traps.c | ||
37 | @@ -590,10 +590,10 @@ void __math_state_restore(void) | ||
38 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | ||
39 | * Don't touch unless you *really* know how it works. | ||
40 | * | ||
41 | - * Must be called with kernel preemption disabled (in this case, | ||
42 | - * local interrupts are disabled at the call-site in entry.S). | ||
43 | + * Must be called with kernel preemption disabled (eg with local | ||
44 | + * local interrupts as in the case of do_device_not_available). | ||
45 | */ | ||
46 | -asmlinkage void math_state_restore(void) | ||
47 | +void math_state_restore(void) | ||
48 | { | ||
49 | struct thread_info *thread = current_thread_info(); | ||
50 | struct task_struct *tsk = thread->task; | ||
51 | -- | ||
52 | 1.7.7.4 | ||
53 | |||
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0002-i387-make-irq_fpu_usable-tests-more-robust.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0002-i387-make-irq_fpu_usable-tests-more-robust.patch new file mode 100644 index 00000000..e0d9cdf2 --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0002-i387-make-irq_fpu_usable-tests-more-robust.patch | |||
@@ -0,0 +1,131 @@ | |||
1 | From 42f2560ed6e9b040ef64e18a5030bf2d2cb05d7f Mon Sep 17 00:00:00 2001 | ||
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | ||
3 | Date: Mon, 13 Feb 2012 13:56:14 -0800 | ||
4 | Subject: [PATCH 02/11] i387: make irq_fpu_usable() tests more robust | ||
5 | |||
6 | commit 5b1cbac37798805c1fee18c8cebe5c0a13975b17 upstream. | ||
7 | |||
8 | Some code - especially the crypto layer - wants to use the x86 | ||
9 | FP/MMX/AVX register set in what may be interrupt (typically softirq) | ||
10 | context. | ||
11 | |||
12 | That *can* be ok, but the tests for when it was ok were somewhat | ||
13 | suspect. We cannot touch the thread-specific status bits either, so | ||
14 | we'd better check that we're not going to try to save FP state or | ||
15 | anything like that. | ||
16 | |||
17 | Now, it may be that the TS bit is always cleared *before* we set the | ||
18 | USEDFPU bit (and only set when we had already cleared the USEDFP | ||
19 | before), so the TS bit test may actually have been sufficient, but it | ||
20 | certainly was not obviously so. | ||
21 | |||
22 | So this explicitly verifies that we will not touch the TS_USEDFPU bit, | ||
23 | and adds a few related sanity-checks. Because it seems that somehow | ||
24 | AES-NI is corrupting user FP state. The cause is not clear, and this | ||
25 | patch doesn't fix it, but while debugging it I really wanted the code to | ||
26 | be more obviously correct and robust. | ||
27 | |||
28 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
29 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
30 | --- | ||
31 | arch/x86/include/asm/i387.h | 54 ++++++++++++++++++++++++++++++++++++------ | ||
32 | arch/x86/kernel/traps.c | 1 + | ||
33 | 2 files changed, 47 insertions(+), 8 deletions(-) | ||
34 | |||
35 | diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h | ||
36 | index cba1432..a436582 100644 | ||
37 | --- a/arch/x86/include/asm/i387.h | ||
38 | +++ b/arch/x86/include/asm/i387.h | ||
39 | @@ -307,9 +307,54 @@ static inline void __clear_fpu(struct task_struct *tsk) | ||
40 | } | ||
41 | } | ||
42 | |||
43 | +/* | ||
44 | + * Were we in an interrupt that interrupted kernel mode? | ||
45 | + * | ||
46 | + * We can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
47 | + * pair does nothing at all: TS_USEDFPU must be clear (so | ||
48 | + * that we don't try to save the FPU state), and TS must | ||
49 | + * be set (so that the clts/stts pair does nothing that is | ||
50 | + * visible in the interrupted kernel thread). | ||
51 | + */ | ||
52 | +static inline bool interrupted_kernel_fpu_idle(void) | ||
53 | +{ | ||
54 | + return !(current_thread_info()->status & TS_USEDFPU) && | ||
55 | + (read_cr0() & X86_CR0_TS); | ||
56 | +} | ||
57 | + | ||
58 | +/* | ||
59 | + * Were we in user mode (or vm86 mode) when we were | ||
60 | + * interrupted? | ||
61 | + * | ||
62 | + * Doing kernel_fpu_begin/end() is ok if we are running | ||
63 | + * in an interrupt context from user mode - we'll just | ||
64 | + * save the FPU state as required. | ||
65 | + */ | ||
66 | +static inline bool interrupted_user_mode(void) | ||
67 | +{ | ||
68 | + struct pt_regs *regs = get_irq_regs(); | ||
69 | + return regs && user_mode_vm(regs); | ||
70 | +} | ||
71 | + | ||
72 | +/* | ||
73 | + * Can we use the FPU in kernel mode with the | ||
74 | + * whole "kernel_fpu_begin/end()" sequence? | ||
75 | + * | ||
76 | + * It's always ok in process context (ie "not interrupt") | ||
77 | + * but it is sometimes ok even from an irq. | ||
78 | + */ | ||
79 | +static inline bool irq_fpu_usable(void) | ||
80 | +{ | ||
81 | + return !in_interrupt() || | ||
82 | + interrupted_user_mode() || | ||
83 | + interrupted_kernel_fpu_idle(); | ||
84 | +} | ||
85 | + | ||
86 | static inline void kernel_fpu_begin(void) | ||
87 | { | ||
88 | struct thread_info *me = current_thread_info(); | ||
89 | + | ||
90 | + WARN_ON_ONCE(!irq_fpu_usable()); | ||
91 | preempt_disable(); | ||
92 | if (me->status & TS_USEDFPU) | ||
93 | __save_init_fpu(me->task); | ||
94 | @@ -323,14 +368,6 @@ static inline void kernel_fpu_end(void) | ||
95 | preempt_enable(); | ||
96 | } | ||
97 | |||
98 | -static inline bool irq_fpu_usable(void) | ||
99 | -{ | ||
100 | - struct pt_regs *regs; | ||
101 | - | ||
102 | - return !in_interrupt() || !(regs = get_irq_regs()) || \ | ||
103 | - user_mode(regs) || (read_cr0() & X86_CR0_TS); | ||
104 | -} | ||
105 | - | ||
106 | /* | ||
107 | * Some instructions like VIA's padlock instructions generate a spurious | ||
108 | * DNA fault but don't modify SSE registers. And these instructions | ||
109 | @@ -367,6 +404,7 @@ static inline void irq_ts_restore(int TS_state) | ||
110 | */ | ||
111 | static inline void save_init_fpu(struct task_struct *tsk) | ||
112 | { | ||
113 | + WARN_ON_ONCE(task_thread_info(tsk)->status & TS_USEDFPU); | ||
114 | preempt_disable(); | ||
115 | __save_init_fpu(tsk); | ||
116 | stts(); | ||
117 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | ||
118 | index 727e6c1..41e0b8c 100644 | ||
119 | --- a/arch/x86/kernel/traps.c | ||
120 | +++ b/arch/x86/kernel/traps.c | ||
121 | @@ -622,6 +622,7 @@ EXPORT_SYMBOL_GPL(math_state_restore); | ||
122 | dotraplinkage void __kprobes | ||
123 | do_device_not_available(struct pt_regs *regs, long error_code) | ||
124 | { | ||
125 | + WARN_ON_ONCE(!user_mode_vm(regs)); | ||
126 | #ifdef CONFIG_MATH_EMULATION | ||
127 | if (read_cr0() & X86_CR0_EM) { | ||
128 | struct math_emu_info info = { }; | ||
129 | -- | ||
130 | 1.7.7.4 | ||
131 | |||
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0003-i387-fix-sense-of-sanity-check.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0003-i387-fix-sense-of-sanity-check.patch new file mode 100644 index 00000000..1b5f9775 --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0003-i387-fix-sense-of-sanity-check.patch | |||
@@ -0,0 +1,37 @@ | |||
1 | From 0a7ea9d5aa1e2cab84a48c0380f7f8c305006224 Mon Sep 17 00:00:00 2001 | ||
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | ||
3 | Date: Wed, 15 Feb 2012 08:05:18 -0800 | ||
4 | Subject: [PATCH 03/11] i387: fix sense of sanity check | ||
5 | |||
6 | commit c38e23456278e967f094b08247ffc3711b1029b2 upstream. | ||
7 | |||
8 | The check for save_init_fpu() (introduced in commit 5b1cbac37798: "i387: | ||
9 | make irq_fpu_usable() tests more robust") was the wrong way around, but | ||
10 | I hadn't noticed, because my "tests" were bogus: the FPU exceptions are | ||
11 | disabled by default, so even doing a divide by zero never actually | ||
12 | triggers this code at all unless you do extra work to enable them. | ||
13 | |||
14 | So if anybody did enable them, they'd get one spurious warning. | ||
15 | |||
16 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
17 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
18 | --- | ||
19 | arch/x86/include/asm/i387.h | 2 +- | ||
20 | 1 files changed, 1 insertions(+), 1 deletions(-) | ||
21 | |||
22 | diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h | ||
23 | index a436582..262bea9 100644 | ||
24 | --- a/arch/x86/include/asm/i387.h | ||
25 | +++ b/arch/x86/include/asm/i387.h | ||
26 | @@ -404,7 +404,7 @@ static inline void irq_ts_restore(int TS_state) | ||
27 | */ | ||
28 | static inline void save_init_fpu(struct task_struct *tsk) | ||
29 | { | ||
30 | - WARN_ON_ONCE(task_thread_info(tsk)->status & TS_USEDFPU); | ||
31 | + WARN_ON_ONCE(!(task_thread_info(tsk)->status & TS_USEDFPU)); | ||
32 | preempt_disable(); | ||
33 | __save_init_fpu(tsk); | ||
34 | stts(); | ||
35 | -- | ||
36 | 1.7.7.4 | ||
37 | |||
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0004-i387-fix-x86-64-preemption-unsafe-user-stack-save-re.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0004-i387-fix-x86-64-preemption-unsafe-user-stack-save-re.patch new file mode 100644 index 00000000..e4969143 --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0004-i387-fix-x86-64-preemption-unsafe-user-stack-save-re.patch | |||
@@ -0,0 +1,163 @@ | |||
1 | From a5c28716652f9f71c848452b67795e5af690a91f Mon Sep 17 00:00:00 2001 | ||
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | ||
3 | Date: Thu, 16 Feb 2012 09:15:04 -0800 | ||
4 | Subject: [PATCH 04/11] i387: fix x86-64 preemption-unsafe user stack | ||
5 | save/restore | ||
6 | |||
7 | commit 15d8791cae75dca27bfda8ecfe87dca9379d6bb0 upstream. | ||
8 | |||
9 | Commit 5b1cbac37798 ("i387: make irq_fpu_usable() tests more robust") | ||
10 | added a sanity check to the #NM handler to verify that we never cause | ||
11 | the "Device Not Available" exception in kernel mode. | ||
12 | |||
13 | However, that check actually pinpointed a (fundamental) race where we do | ||
14 | cause that exception as part of the signal stack FPU state save/restore | ||
15 | code. | ||
16 | |||
17 | Because we use the floating point instructions themselves to save and | ||
18 | restore state directly from user mode, we cannot do that atomically with | ||
19 | testing the TS_USEDFPU bit: the user mode access itself may cause a page | ||
20 | fault, which causes a task switch, which saves and restores the FP/MMX | ||
21 | state from the kernel buffers. | ||
22 | |||
23 | This kind of "recursive" FP state save is fine per se, but it means that | ||
24 | when the signal stack save/restore gets restarted, it will now take the | ||
25 | '#NM' exception we originally tried to avoid. With preemption this can | ||
26 | happen even without the page fault - but because of the user access, we | ||
27 | cannot just disable preemption around the save/restore instruction. | ||
28 | |||
29 | There are various ways to solve this, including using the | ||
30 | "enable/disable_page_fault()" helpers to not allow page faults at all | ||
31 | during the sequence, and fall back to copying things by hand without the | ||
32 | use of the native FP state save/restore instructions. | ||
33 | |||
34 | However, the simplest thing to do is to just allow the #NM from kernel | ||
35 | space, but fix the race in setting and clearing CR0.TS that this all | ||
36 | exposed: the TS bit changes and the TS_USEDFPU bit absolutely have to be | ||
37 | atomic wrt scheduling, so while the actual state save/restore can be | ||
38 | interrupted and restarted, the act of actually clearing/setting CR0.TS | ||
39 | and the TS_USEDFPU bit together must not. | ||
40 | |||
41 | Instead of just adding random "preempt_disable/enable()" calls to what | ||
42 | is already excessively ugly code, this introduces some helper functions | ||
43 | that mostly mirror the "kernel_fpu_begin/end()" functionality, just for | ||
44 | the user state instead. | ||
45 | |||
46 | Those helper functions should probably eventually replace the other | ||
47 | ad-hoc CR0.TS and TS_USEDFPU tests too, but I'll need to think about it | ||
48 | some more: the task switching functionality in particular needs to | ||
49 | expose the difference between the 'prev' and 'next' threads, while the | ||
50 | new helper functions intentionally were written to only work with | ||
51 | 'current'. | ||
52 | |||
53 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
54 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
55 | --- | ||
56 | arch/x86/include/asm/i387.h | 42 ++++++++++++++++++++++++++++++++++++++++++ | ||
57 | arch/x86/kernel/traps.c | 1 - | ||
58 | arch/x86/kernel/xsave.c | 10 +++------- | ||
59 | 3 files changed, 45 insertions(+), 8 deletions(-) | ||
60 | |||
61 | diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h | ||
62 | index 262bea9..6e87fa4 100644 | ||
63 | --- a/arch/x86/include/asm/i387.h | ||
64 | +++ b/arch/x86/include/asm/i387.h | ||
65 | @@ -400,6 +400,48 @@ static inline void irq_ts_restore(int TS_state) | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | + * The question "does this thread have fpu access?" | ||
70 | + * is slightly racy, since preemption could come in | ||
71 | + * and revoke it immediately after the test. | ||
72 | + * | ||
73 | + * However, even in that very unlikely scenario, | ||
74 | + * we can just assume we have FPU access - typically | ||
75 | + * to save the FP state - we'll just take a #NM | ||
76 | + * fault and get the FPU access back. | ||
77 | + * | ||
78 | + * The actual user_fpu_begin/end() functions | ||
79 | + * need to be preemption-safe, though. | ||
80 | + * | ||
81 | + * NOTE! user_fpu_end() must be used only after you | ||
82 | + * have saved the FP state, and user_fpu_begin() must | ||
83 | + * be used only immediately before restoring it. | ||
84 | + * These functions do not do any save/restore on | ||
85 | + * their own. | ||
86 | + */ | ||
87 | +static inline int user_has_fpu(void) | ||
88 | +{ | ||
89 | + return current_thread_info()->status & TS_USEDFPU; | ||
90 | +} | ||
91 | + | ||
92 | +static inline void user_fpu_end(void) | ||
93 | +{ | ||
94 | + preempt_disable(); | ||
95 | + current_thread_info()->status &= ~TS_USEDFPU; | ||
96 | + stts(); | ||
97 | + preempt_enable(); | ||
98 | +} | ||
99 | + | ||
100 | +static inline void user_fpu_begin(void) | ||
101 | +{ | ||
102 | + preempt_disable(); | ||
103 | + if (!user_has_fpu()) { | ||
104 | + clts(); | ||
105 | + current_thread_info()->status |= TS_USEDFPU; | ||
106 | + } | ||
107 | + preempt_enable(); | ||
108 | +} | ||
109 | + | ||
110 | +/* | ||
111 | * These disable preemption on their own and are safe | ||
112 | */ | ||
113 | static inline void save_init_fpu(struct task_struct *tsk) | ||
114 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | ||
115 | index 41e0b8c..727e6c1 100644 | ||
116 | --- a/arch/x86/kernel/traps.c | ||
117 | +++ b/arch/x86/kernel/traps.c | ||
118 | @@ -622,7 +622,6 @@ EXPORT_SYMBOL_GPL(math_state_restore); | ||
119 | dotraplinkage void __kprobes | ||
120 | do_device_not_available(struct pt_regs *regs, long error_code) | ||
121 | { | ||
122 | - WARN_ON_ONCE(!user_mode_vm(regs)); | ||
123 | #ifdef CONFIG_MATH_EMULATION | ||
124 | if (read_cr0() & X86_CR0_EM) { | ||
125 | struct math_emu_info info = { }; | ||
126 | diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c | ||
127 | index a391134..86f1f09 100644 | ||
128 | --- a/arch/x86/kernel/xsave.c | ||
129 | +++ b/arch/x86/kernel/xsave.c | ||
130 | @@ -168,7 +168,7 @@ int save_i387_xstate(void __user *buf) | ||
131 | if (!used_math()) | ||
132 | return 0; | ||
133 | |||
134 | - if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
135 | + if (user_has_fpu()) { | ||
136 | if (use_xsave()) | ||
137 | err = xsave_user(buf); | ||
138 | else | ||
139 | @@ -176,8 +176,7 @@ int save_i387_xstate(void __user *buf) | ||
140 | |||
141 | if (err) | ||
142 | return err; | ||
143 | - task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
144 | - stts(); | ||
145 | + user_fpu_end(); | ||
146 | } else { | ||
147 | sanitize_i387_state(tsk); | ||
148 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, | ||
149 | @@ -292,10 +291,7 @@ int restore_i387_xstate(void __user *buf) | ||
150 | return err; | ||
151 | } | ||
152 | |||
153 | - if (!(task_thread_info(current)->status & TS_USEDFPU)) { | ||
154 | - clts(); | ||
155 | - task_thread_info(current)->status |= TS_USEDFPU; | ||
156 | - } | ||
157 | + user_fpu_begin(); | ||
158 | if (use_xsave()) | ||
159 | err = restore_user_xstate(buf); | ||
160 | else | ||
161 | -- | ||
162 | 1.7.7.4 | ||
163 | |||
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0005-i387-move-TS_USEDFPU-clearing-out-of-__save_init_fpu.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0005-i387-move-TS_USEDFPU-clearing-out-of-__save_init_fpu.patch new file mode 100644 index 00000000..a8d1982d --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0005-i387-move-TS_USEDFPU-clearing-out-of-__save_init_fpu.patch | |||
@@ -0,0 +1,64 @@ | |||
1 | From 38358b6185298df66ef4ddb4ceaaa1baf8521b28 Mon Sep 17 00:00:00 2001 | ||
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | ||
3 | Date: Thu, 16 Feb 2012 12:22:48 -0800 | ||
4 | Subject: [PATCH 05/11] i387: move TS_USEDFPU clearing out of __save_init_fpu | ||
5 | and into callers | ||
6 | |||
7 | commit b6c66418dcad0fcf83cd1d0a39482db37bf4fc41 upstream. | ||
8 | |||
9 | Touching TS_USEDFPU without touching CR0.TS is confusing, so don't do | ||
10 | it. By moving it into the callers, we always do the TS_USEDFPU next to | ||
11 | the CR0.TS accesses in the source code, and it's much easier to see how | ||
12 | the two go hand in hand. | ||
13 | |||
14 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
15 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
16 | --- | ||
17 | arch/x86/include/asm/i387.h | 9 ++++++--- | ||
18 | 1 files changed, 6 insertions(+), 3 deletions(-) | ||
19 | |||
20 | diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h | ||
21 | index 6e87fa4..55fb3aa 100644 | ||
22 | --- a/arch/x86/include/asm/i387.h | ||
23 | +++ b/arch/x86/include/asm/i387.h | ||
24 | @@ -259,7 +259,6 @@ static inline void fpu_save_init(struct fpu *fpu) | ||
25 | static inline void __save_init_fpu(struct task_struct *tsk) | ||
26 | { | ||
27 | fpu_save_init(&tsk->thread.fpu); | ||
28 | - task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
29 | } | ||
30 | |||
31 | static inline int fpu_fxrstor_checking(struct fpu *fpu) | ||
32 | @@ -290,6 +289,7 @@ static inline void __unlazy_fpu(struct task_struct *tsk) | ||
33 | { | ||
34 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
35 | __save_init_fpu(tsk); | ||
36 | + task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
37 | stts(); | ||
38 | } else | ||
39 | tsk->fpu_counter = 0; | ||
40 | @@ -356,9 +356,11 @@ static inline void kernel_fpu_begin(void) | ||
41 | |||
42 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
43 | preempt_disable(); | ||
44 | - if (me->status & TS_USEDFPU) | ||
45 | + if (me->status & TS_USEDFPU) { | ||
46 | __save_init_fpu(me->task); | ||
47 | - else | ||
48 | + me->status &= ~TS_USEDFPU; | ||
49 | + /* We do 'stts()' in kernel_fpu_end() */ | ||
50 | + } else | ||
51 | clts(); | ||
52 | } | ||
53 | |||
54 | @@ -449,6 +451,7 @@ static inline void save_init_fpu(struct task_struct *tsk) | ||
55 | WARN_ON_ONCE(!(task_thread_info(tsk)->status & TS_USEDFPU)); | ||
56 | preempt_disable(); | ||
57 | __save_init_fpu(tsk); | ||
58 | + task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
59 | stts(); | ||
60 | preempt_enable(); | ||
61 | } | ||
62 | -- | ||
63 | 1.7.7.4 | ||
64 | |||
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0006-i387-don-t-ever-touch-TS_USEDFPU-directly-use-helper.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0006-i387-don-t-ever-touch-TS_USEDFPU-directly-use-helper.patch new file mode 100644 index 00000000..7daaa620 --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0006-i387-don-t-ever-touch-TS_USEDFPU-directly-use-helper.patch | |||
@@ -0,0 +1,228 @@ | |||
1 | From 29515b215b9bbbad0368a5039ba6e53ed3fa7f25 Mon Sep 17 00:00:00 2001 | ||
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | ||
3 | Date: Thu, 16 Feb 2012 13:33:12 -0800 | ||
4 | Subject: [PATCH 06/11] i387: don't ever touch TS_USEDFPU directly, use helper | ||
5 | functions | ||
6 | |||
7 | commit 6d59d7a9f5b723a7ac1925c136e93ec83c0c3043 upstream. | ||
8 | |||
9 | This creates three helper functions that do the TS_USEDFPU accesses, and | ||
10 | makes everybody that used to do it by hand use those helpers instead. | ||
11 | |||
12 | In addition, there's a couple of helper functions for the "change both | ||
13 | CR0.TS and TS_USEDFPU at the same time" case, and the places that do | ||
14 | that together have been changed to use those. That means that we have | ||
15 | fewer random places that open-code this situation. | ||
16 | |||
17 | The intent is partly to clarify the code without actually changing any | ||
18 | semantics yet (since we clearly still have some hard to reproduce bug in | ||
19 | this area), but also to make it much easier to use another approach | ||
20 | entirely to caching the CR0.TS bit for software accesses. | ||
21 | |||
22 | Right now we use a bit in the thread-info 'status' variable (this patch | ||
23 | does not change that), but we might want to make it a full field of its | ||
24 | own or even make it a per-cpu variable. | ||
25 | |||
26 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
27 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
28 | --- | ||
29 | arch/x86/include/asm/i387.h | 75 +++++++++++++++++++++++++++++++----------- | ||
30 | arch/x86/kernel/traps.c | 2 +- | ||
31 | arch/x86/kernel/xsave.c | 2 +- | ||
32 | arch/x86/kvm/vmx.c | 2 +- | ||
33 | 4 files changed, 58 insertions(+), 23 deletions(-) | ||
34 | |||
35 | diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h | ||
36 | index 55fb3aa..730d7be 100644 | ||
37 | --- a/arch/x86/include/asm/i387.h | ||
38 | +++ b/arch/x86/include/asm/i387.h | ||
39 | @@ -280,6 +280,47 @@ static inline int restore_fpu_checking(struct task_struct *tsk) | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | + * Software FPU state helpers. Careful: these need to | ||
44 | + * be preemption protection *and* they need to be | ||
45 | + * properly paired with the CR0.TS changes! | ||
46 | + */ | ||
47 | +static inline int __thread_has_fpu(struct thread_info *ti) | ||
48 | +{ | ||
49 | + return ti->status & TS_USEDFPU; | ||
50 | +} | ||
51 | + | ||
52 | +/* Must be paired with an 'stts' after! */ | ||
53 | +static inline void __thread_clear_has_fpu(struct thread_info *ti) | ||
54 | +{ | ||
55 | + ti->status &= ~TS_USEDFPU; | ||
56 | +} | ||
57 | + | ||
58 | +/* Must be paired with a 'clts' before! */ | ||
59 | +static inline void __thread_set_has_fpu(struct thread_info *ti) | ||
60 | +{ | ||
61 | + ti->status |= TS_USEDFPU; | ||
62 | +} | ||
63 | + | ||
64 | +/* | ||
65 | + * Encapsulate the CR0.TS handling together with the | ||
66 | + * software flag. | ||
67 | + * | ||
68 | + * These generally need preemption protection to work, | ||
69 | + * do try to avoid using these on their own. | ||
70 | + */ | ||
71 | +static inline void __thread_fpu_end(struct thread_info *ti) | ||
72 | +{ | ||
73 | + __thread_clear_has_fpu(ti); | ||
74 | + stts(); | ||
75 | +} | ||
76 | + | ||
77 | +static inline void __thread_fpu_begin(struct thread_info *ti) | ||
78 | +{ | ||
79 | + clts(); | ||
80 | + __thread_set_has_fpu(ti); | ||
81 | +} | ||
82 | + | ||
83 | +/* | ||
84 | * Signal frame handlers... | ||
85 | */ | ||
86 | extern int save_i387_xstate(void __user *buf); | ||
87 | @@ -287,23 +328,21 @@ extern int restore_i387_xstate(void __user *buf); | ||
88 | |||
89 | static inline void __unlazy_fpu(struct task_struct *tsk) | ||
90 | { | ||
91 | - if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
92 | + if (__thread_has_fpu(task_thread_info(tsk))) { | ||
93 | __save_init_fpu(tsk); | ||
94 | - task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
95 | - stts(); | ||
96 | + __thread_fpu_end(task_thread_info(tsk)); | ||
97 | } else | ||
98 | tsk->fpu_counter = 0; | ||
99 | } | ||
100 | |||
101 | static inline void __clear_fpu(struct task_struct *tsk) | ||
102 | { | ||
103 | - if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
104 | + if (__thread_has_fpu(task_thread_info(tsk))) { | ||
105 | /* Ignore delayed exceptions from user space */ | ||
106 | asm volatile("1: fwait\n" | ||
107 | "2:\n" | ||
108 | _ASM_EXTABLE(1b, 2b)); | ||
109 | - task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
110 | - stts(); | ||
111 | + __thread_fpu_end(task_thread_info(tsk)); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | @@ -311,14 +350,14 @@ static inline void __clear_fpu(struct task_struct *tsk) | ||
116 | * Were we in an interrupt that interrupted kernel mode? | ||
117 | * | ||
118 | * We can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
119 | - * pair does nothing at all: TS_USEDFPU must be clear (so | ||
120 | + * pair does nothing at all: the thread must not have fpu (so | ||
121 | * that we don't try to save the FPU state), and TS must | ||
122 | * be set (so that the clts/stts pair does nothing that is | ||
123 | * visible in the interrupted kernel thread). | ||
124 | */ | ||
125 | static inline bool interrupted_kernel_fpu_idle(void) | ||
126 | { | ||
127 | - return !(current_thread_info()->status & TS_USEDFPU) && | ||
128 | + return !__thread_has_fpu(current_thread_info()) && | ||
129 | (read_cr0() & X86_CR0_TS); | ||
130 | } | ||
131 | |||
132 | @@ -356,9 +395,9 @@ static inline void kernel_fpu_begin(void) | ||
133 | |||
134 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
135 | preempt_disable(); | ||
136 | - if (me->status & TS_USEDFPU) { | ||
137 | + if (__thread_has_fpu(me)) { | ||
138 | __save_init_fpu(me->task); | ||
139 | - me->status &= ~TS_USEDFPU; | ||
140 | + __thread_clear_has_fpu(me); | ||
141 | /* We do 'stts()' in kernel_fpu_end() */ | ||
142 | } else | ||
143 | clts(); | ||
144 | @@ -422,24 +461,21 @@ static inline void irq_ts_restore(int TS_state) | ||
145 | */ | ||
146 | static inline int user_has_fpu(void) | ||
147 | { | ||
148 | - return current_thread_info()->status & TS_USEDFPU; | ||
149 | + return __thread_has_fpu(current_thread_info()); | ||
150 | } | ||
151 | |||
152 | static inline void user_fpu_end(void) | ||
153 | { | ||
154 | preempt_disable(); | ||
155 | - current_thread_info()->status &= ~TS_USEDFPU; | ||
156 | - stts(); | ||
157 | + __thread_fpu_end(current_thread_info()); | ||
158 | preempt_enable(); | ||
159 | } | ||
160 | |||
161 | static inline void user_fpu_begin(void) | ||
162 | { | ||
163 | preempt_disable(); | ||
164 | - if (!user_has_fpu()) { | ||
165 | - clts(); | ||
166 | - current_thread_info()->status |= TS_USEDFPU; | ||
167 | - } | ||
168 | + if (!user_has_fpu()) | ||
169 | + __thread_fpu_begin(current_thread_info()); | ||
170 | preempt_enable(); | ||
171 | } | ||
172 | |||
173 | @@ -448,11 +484,10 @@ static inline void user_fpu_begin(void) | ||
174 | */ | ||
175 | static inline void save_init_fpu(struct task_struct *tsk) | ||
176 | { | ||
177 | - WARN_ON_ONCE(!(task_thread_info(tsk)->status & TS_USEDFPU)); | ||
178 | + WARN_ON_ONCE(!__thread_has_fpu(task_thread_info(tsk))); | ||
179 | preempt_disable(); | ||
180 | __save_init_fpu(tsk); | ||
181 | - task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
182 | - stts(); | ||
183 | + __thread_fpu_end(task_thread_info(tsk)); | ||
184 | preempt_enable(); | ||
185 | } | ||
186 | |||
187 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | ||
188 | index 727e6c1..bb5445c 100644 | ||
189 | --- a/arch/x86/kernel/traps.c | ||
190 | +++ b/arch/x86/kernel/traps.c | ||
191 | @@ -579,7 +579,7 @@ void __math_state_restore(void) | ||
192 | return; | ||
193 | } | ||
194 | |||
195 | - thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | ||
196 | + __thread_set_has_fpu(thread); /* clts in caller! */ | ||
197 | tsk->fpu_counter++; | ||
198 | } | ||
199 | |||
200 | diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c | ||
201 | index 86f1f09..a0bcd0d 100644 | ||
202 | --- a/arch/x86/kernel/xsave.c | ||
203 | +++ b/arch/x86/kernel/xsave.c | ||
204 | @@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_struct *tsk) | ||
205 | if (!fx) | ||
206 | return; | ||
207 | |||
208 | - BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU); | ||
209 | + BUG_ON(__thread_has_fpu(task_thread_info(tsk))); | ||
210 | |||
211 | xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; | ||
212 | |||
213 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c | ||
214 | index 579a0b5..b2c612d 100644 | ||
215 | --- a/arch/x86/kvm/vmx.c | ||
216 | +++ b/arch/x86/kvm/vmx.c | ||
217 | @@ -1456,7 +1456,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | ||
218 | #ifdef CONFIG_X86_64 | ||
219 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
220 | #endif | ||
221 | - if (current_thread_info()->status & TS_USEDFPU) | ||
222 | + if (__thread_has_fpu(current_thread_info())) | ||
223 | clts(); | ||
224 | load_gdt(&__get_cpu_var(host_gdt)); | ||
225 | } | ||
226 | -- | ||
227 | 1.7.7.4 | ||
228 | |||
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0007-i387-do-not-preload-FPU-state-at-task-switch-time.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0007-i387-do-not-preload-FPU-state-at-task-switch-time.patch new file mode 100644 index 00000000..c6553d93 --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0007-i387-do-not-preload-FPU-state-at-task-switch-time.patch | |||
@@ -0,0 +1,202 @@ | |||
1 | From ba6aaed5cc8f55b77644daf56e9ae3a75f042908 Mon Sep 17 00:00:00 2001 | ||
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | ||
3 | Date: Thu, 16 Feb 2012 15:45:23 -0800 | ||
4 | Subject: [PATCH 07/11] i387: do not preload FPU state at task switch time | ||
5 | |||
6 | commit b3b0870ef3ffed72b92415423da864f440f57ad6 upstream. | ||
7 | |||
8 | Yes, taking the trap to re-load the FPU/MMX state is expensive, but so | ||
9 | is spending several days looking for a bug in the state save/restore | ||
10 | code. And the preload code has some rather subtle interactions with | ||
11 | both paravirtualization support and segment state restore, so it's not | ||
12 | nearly as simple as it should be. | ||
13 | |||
14 | Also, now that we no longer necessarily depend on a single bit (ie | ||
15 | TS_USEDFPU) for keeping track of the state of the FPU, we migth be able | ||
16 | to do better. If we are really switching between two processes that | ||
17 | keep touching the FP state, save/restore is inevitable, but in the case | ||
18 | of having one process that does most of the FPU usage, we may actually | ||
19 | be able to do much better than the preloading. | ||
20 | |||
21 | In particular, we may be able to keep track of which CPU the process ran | ||
22 | on last, and also per CPU keep track of which process' FP state that CPU | ||
23 | has. For modern CPU's that don't destroy the FPU contents on save time, | ||
24 | that would allow us to do a lazy restore by just re-enabling the | ||
25 | existing FPU state - with no restore cost at all! | ||
26 | |||
27 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
28 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
29 | --- | ||
30 | arch/x86/include/asm/i387.h | 1 - | ||
31 | arch/x86/kernel/process_32.c | 20 -------------------- | ||
32 | arch/x86/kernel/process_64.c | 23 ----------------------- | ||
33 | arch/x86/kernel/traps.c | 35 +++++++++++------------------------ | ||
34 | 4 files changed, 11 insertions(+), 68 deletions(-) | ||
35 | |||
36 | diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h | ||
37 | index 730d7be..3521c24 100644 | ||
38 | --- a/arch/x86/include/asm/i387.h | ||
39 | +++ b/arch/x86/include/asm/i387.h | ||
40 | @@ -30,7 +30,6 @@ extern void fpu_init(void); | ||
41 | extern void mxcsr_feature_mask_init(void); | ||
42 | extern int init_fpu(struct task_struct *child); | ||
43 | extern void math_state_restore(void); | ||
44 | -extern void __math_state_restore(void); | ||
45 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | ||
46 | |||
47 | extern user_regset_active_fn fpregs_active, xfpregs_active; | ||
48 | diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c | ||
49 | index 795b79f..0cdb4fa 100644 | ||
50 | --- a/arch/x86/kernel/process_32.c | ||
51 | +++ b/arch/x86/kernel/process_32.c | ||
52 | @@ -297,23 +297,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
53 | *next = &next_p->thread; | ||
54 | int cpu = smp_processor_id(); | ||
55 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
56 | - bool preload_fpu; | ||
57 | |||
58 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | ||
59 | |||
60 | - /* | ||
61 | - * If the task has used fpu the last 5 timeslices, just do a full | ||
62 | - * restore of the math state immediately to avoid the trap; the | ||
63 | - * chances of needing FPU soon are obviously high now | ||
64 | - */ | ||
65 | - preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
66 | - | ||
67 | __unlazy_fpu(prev_p); | ||
68 | |||
69 | - /* we're going to use this soon, after a few expensive things */ | ||
70 | - if (preload_fpu) | ||
71 | - prefetch(next->fpu.state); | ||
72 | - | ||
73 | /* | ||
74 | * Reload esp0. | ||
75 | */ | ||
76 | @@ -352,11 +340,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
77 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) | ||
78 | __switch_to_xtra(prev_p, next_p, tss); | ||
79 | |||
80 | - /* If we're going to preload the fpu context, make sure clts | ||
81 | - is run while we're batching the cpu state updates. */ | ||
82 | - if (preload_fpu) | ||
83 | - clts(); | ||
84 | - | ||
85 | /* | ||
86 | * Leave lazy mode, flushing any hypercalls made here. | ||
87 | * This must be done before restoring TLS segments so | ||
88 | @@ -366,9 +349,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
89 | */ | ||
90 | arch_end_context_switch(next_p); | ||
91 | |||
92 | - if (preload_fpu) | ||
93 | - __math_state_restore(); | ||
94 | - | ||
95 | /* | ||
96 | * Restore %gs if needed (which is common) | ||
97 | */ | ||
98 | diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c | ||
99 | index 3bd7e6e..370801e 100644 | ||
100 | --- a/arch/x86/kernel/process_64.c | ||
101 | +++ b/arch/x86/kernel/process_64.c | ||
102 | @@ -381,18 +381,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
103 | int cpu = smp_processor_id(); | ||
104 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
105 | unsigned fsindex, gsindex; | ||
106 | - bool preload_fpu; | ||
107 | - | ||
108 | - /* | ||
109 | - * If the task has used fpu the last 5 timeslices, just do a full | ||
110 | - * restore of the math state immediately to avoid the trap; the | ||
111 | - * chances of needing FPU soon are obviously high now | ||
112 | - */ | ||
113 | - preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
114 | - | ||
115 | - /* we're going to use this soon, after a few expensive things */ | ||
116 | - if (preload_fpu) | ||
117 | - prefetch(next->fpu.state); | ||
118 | |||
119 | /* | ||
120 | * Reload esp0, LDT and the page table pointer: | ||
121 | @@ -425,10 +413,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
122 | /* Must be after DS reload */ | ||
123 | __unlazy_fpu(prev_p); | ||
124 | |||
125 | - /* Make sure cpu is ready for new context */ | ||
126 | - if (preload_fpu) | ||
127 | - clts(); | ||
128 | - | ||
129 | /* | ||
130 | * Leave lazy mode, flushing any hypercalls made here. | ||
131 | * This must be done before restoring TLS segments so | ||
132 | @@ -487,13 +471,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
133 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) | ||
134 | __switch_to_xtra(prev_p, next_p, tss); | ||
135 | |||
136 | - /* | ||
137 | - * Preload the FPU context, now that we've determined that the | ||
138 | - * task is likely to be using it. | ||
139 | - */ | ||
140 | - if (preload_fpu) | ||
141 | - __math_state_restore(); | ||
142 | - | ||
143 | return prev_p; | ||
144 | } | ||
145 | |||
146 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | ||
147 | index bb5445c..7767ed2 100644 | ||
148 | --- a/arch/x86/kernel/traps.c | ||
149 | +++ b/arch/x86/kernel/traps.c | ||
150 | @@ -562,28 +562,6 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | - * __math_state_restore assumes that cr0.TS is already clear and the | ||
155 | - * fpu state is all ready for use. Used during context switch. | ||
156 | - */ | ||
157 | -void __math_state_restore(void) | ||
158 | -{ | ||
159 | - struct thread_info *thread = current_thread_info(); | ||
160 | - struct task_struct *tsk = thread->task; | ||
161 | - | ||
162 | - /* | ||
163 | - * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
164 | - */ | ||
165 | - if (unlikely(restore_fpu_checking(tsk))) { | ||
166 | - stts(); | ||
167 | - force_sig(SIGSEGV, tsk); | ||
168 | - return; | ||
169 | - } | ||
170 | - | ||
171 | - __thread_set_has_fpu(thread); /* clts in caller! */ | ||
172 | - tsk->fpu_counter++; | ||
173 | -} | ||
174 | - | ||
175 | -/* | ||
176 | * 'math_state_restore()' saves the current math information in the | ||
177 | * old math state array, and gets the new ones from the current task | ||
178 | * | ||
179 | @@ -613,9 +591,18 @@ void math_state_restore(void) | ||
180 | local_irq_disable(); | ||
181 | } | ||
182 | |||
183 | - clts(); /* Allow maths ops (or we recurse) */ | ||
184 | + __thread_fpu_begin(thread); | ||
185 | |||
186 | - __math_state_restore(); | ||
187 | + /* | ||
188 | + * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
189 | + */ | ||
190 | + if (unlikely(restore_fpu_checking(tsk))) { | ||
191 | + __thread_fpu_end(thread); | ||
192 | + force_sig(SIGSEGV, tsk); | ||
193 | + return; | ||
194 | + } | ||
195 | + | ||
196 | + tsk->fpu_counter++; | ||
197 | } | ||
198 | EXPORT_SYMBOL_GPL(math_state_restore); | ||
199 | |||
200 | -- | ||
201 | 1.7.7.4 | ||
202 | |||
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0008-i387-move-AMD-K7-K8-fpu-fxsave-fxrstor-workaround-fr.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0008-i387-move-AMD-K7-K8-fpu-fxsave-fxrstor-workaround-fr.patch new file mode 100644 index 00000000..00458e1c --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0008-i387-move-AMD-K7-K8-fpu-fxsave-fxrstor-workaround-fr.patch | |||
@@ -0,0 +1,133 @@ | |||
1 | From 9147fbe60acc9125e7b0deae409f1da5c3f8bdda Mon Sep 17 00:00:00 2001 | ||
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | ||
3 | Date: Thu, 16 Feb 2012 19:11:15 -0800 | ||
4 | Subject: [PATCH 08/11] i387: move AMD K7/K8 fpu fxsave/fxrstor workaround | ||
5 | from save to restore | ||
6 | |||
7 | commit 4903062b5485f0e2c286a23b44c9b59d9b017d53 upstream. | ||
8 | |||
9 | The AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is | ||
10 | pending. In order to not leak FIP state from one process to another, we | ||
11 | need to do a floating point load after the fxsave of the old process, | ||
12 | and before the fxrstor of the new FPU state. That resets the state to | ||
13 | the (uninteresting) kernel load, rather than some potentially sensitive | ||
14 | user information. | ||
15 | |||
16 | We used to do this directly after the FPU state save, but that is | ||
17 | actually very inconvenient, since it | ||
18 | |||
19 | (a) corrupts what is potentially perfectly good FPU state that we might | ||
20 | want to lazy avoid restoring later and | ||
21 | |||
22 | (b) on x86-64 it resulted in a very annoying ordering constraint, where | ||
23 | "__unlazy_fpu()" in the task switch needs to be delayed until after | ||
24 | the DS segment has been reloaded just to get the new DS value. | ||
25 | |||
26 | Coupling it to the fxrstor instead of the fxsave automatically avoids | ||
27 | both of these issues, and also ensures that we only do it when actually | ||
28 | necessary (the FP state after a save may never actually get used). It's | ||
29 | simply a much more natural place for the leaked state cleanup. | ||
30 | |||
31 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
32 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
33 | --- | ||
34 | arch/x86/include/asm/i387.h | 19 ------------------- | ||
35 | arch/x86/kernel/process_64.c | 5 ++--- | ||
36 | arch/x86/kernel/traps.c | 14 ++++++++++++++ | ||
37 | 3 files changed, 16 insertions(+), 22 deletions(-) | ||
38 | |||
39 | diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h | ||
40 | index 3521c24..01b115d 100644 | ||
41 | --- a/arch/x86/include/asm/i387.h | ||
42 | +++ b/arch/x86/include/asm/i387.h | ||
43 | @@ -211,15 +211,6 @@ static inline void fpu_fxsave(struct fpu *fpu) | ||
44 | |||
45 | #endif /* CONFIG_X86_64 */ | ||
46 | |||
47 | -/* We need a safe address that is cheap to find and that is already | ||
48 | - in L1 during context switch. The best choices are unfortunately | ||
49 | - different for UP and SMP */ | ||
50 | -#ifdef CONFIG_SMP | ||
51 | -#define safe_address (__per_cpu_offset[0]) | ||
52 | -#else | ||
53 | -#define safe_address (kstat_cpu(0).cpustat.user) | ||
54 | -#endif | ||
55 | - | ||
56 | /* | ||
57 | * These must be called with preempt disabled | ||
58 | */ | ||
59 | @@ -243,16 +234,6 @@ static inline void fpu_save_init(struct fpu *fpu) | ||
60 | |||
61 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) | ||
62 | asm volatile("fnclex"); | ||
63 | - | ||
64 | - /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | ||
65 | - is pending. Clear the x87 state here by setting it to fixed | ||
66 | - values. safe_address is a random variable that should be in L1 */ | ||
67 | - alternative_input( | ||
68 | - ASM_NOP8 ASM_NOP2, | ||
69 | - "emms\n\t" /* clear stack tags */ | ||
70 | - "fildl %P[addr]", /* set F?P to defined value */ | ||
71 | - X86_FEATURE_FXSAVE_LEAK, | ||
72 | - [addr] "m" (safe_address)); | ||
73 | } | ||
74 | |||
75 | static inline void __save_init_fpu(struct task_struct *tsk) | ||
76 | diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c | ||
77 | index 370801e..042b18f 100644 | ||
78 | --- a/arch/x86/kernel/process_64.c | ||
79 | +++ b/arch/x86/kernel/process_64.c | ||
80 | @@ -382,6 +382,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
81 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
82 | unsigned fsindex, gsindex; | ||
83 | |||
84 | + __unlazy_fpu(prev_p); | ||
85 | + | ||
86 | /* | ||
87 | * Reload esp0, LDT and the page table pointer: | ||
88 | */ | ||
89 | @@ -410,9 +412,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
90 | |||
91 | load_TLS(next, cpu); | ||
92 | |||
93 | - /* Must be after DS reload */ | ||
94 | - __unlazy_fpu(prev_p); | ||
95 | - | ||
96 | /* | ||
97 | * Leave lazy mode, flushing any hypercalls made here. | ||
98 | * This must be done before restoring TLS segments so | ||
99 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | ||
100 | index 7767ed2..c24bb75 100644 | ||
101 | --- a/arch/x86/kernel/traps.c | ||
102 | +++ b/arch/x86/kernel/traps.c | ||
103 | @@ -576,6 +576,10 @@ void math_state_restore(void) | ||
104 | struct thread_info *thread = current_thread_info(); | ||
105 | struct task_struct *tsk = thread->task; | ||
106 | |||
107 | + /* We need a safe address that is cheap to find and that is already | ||
108 | + in L1. We just brought in "thread->task", so use that */ | ||
109 | +#define safe_address (thread->task) | ||
110 | + | ||
111 | if (!tsk_used_math(tsk)) { | ||
112 | local_irq_enable(); | ||
113 | /* | ||
114 | @@ -593,6 +597,16 @@ void math_state_restore(void) | ||
115 | |||
116 | __thread_fpu_begin(thread); | ||
117 | |||
118 | + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | ||
119 | + is pending. Clear the x87 state here by setting it to fixed | ||
120 | + values. safe_address is a random variable that should be in L1 */ | ||
121 | + alternative_input( | ||
122 | + ASM_NOP8 ASM_NOP2, | ||
123 | + "emms\n\t" /* clear stack tags */ | ||
124 | + "fildl %P[addr]", /* set F?P to defined value */ | ||
125 | + X86_FEATURE_FXSAVE_LEAK, | ||
126 | + [addr] "m" (safe_address)); | ||
127 | + | ||
128 | /* | ||
129 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
130 | */ | ||
131 | -- | ||
132 | 1.7.7.4 | ||
133 | |||
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0009-i387-move-TS_USEDFPU-flag-from-thread_info-to-task_s.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0009-i387-move-TS_USEDFPU-flag-from-thread_info-to-task_s.patch new file mode 100644 index 00000000..af43d215 --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0009-i387-move-TS_USEDFPU-flag-from-thread_info-to-task_s.patch | |||
@@ -0,0 +1,305 @@ | |||
1 | From 555558c5bf8e8d9919fbcbe4b1cfe920f692c0cb Mon Sep 17 00:00:00 2001 | ||
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | ||
3 | Date: Fri, 17 Feb 2012 21:48:54 -0800 | ||
4 | Subject: [PATCH 09/11] i387: move TS_USEDFPU flag from thread_info to | ||
5 | task_struct | ||
6 | |||
7 | commit f94edacf998516ac9d849f7bc6949a703977a7f3 upstream. | ||
8 | |||
9 | This moves the bit that indicates whether a thread has ownership of the | ||
10 | FPU from the TS_USEDFPU bit in thread_info->status to a word of its own | ||
11 | (called 'has_fpu') in task_struct->thread.has_fpu. | ||
12 | |||
13 | This fixes two independent bugs at the same time: | ||
14 | |||
15 | - changing 'thread_info->status' from the scheduler causes nasty | ||
16 | problems for the other users of that variable, since it is defined to | ||
17 | be thread-synchronous (that's what the "TS_" part of the naming was | ||
18 | supposed to indicate). | ||
19 | |||
20 | So perfectly valid code could (and did) do | ||
21 | |||
22 | ti->status |= TS_RESTORE_SIGMASK; | ||
23 | |||
24 | and the compiler was free to do that as separate load, or and store | ||
25 | instructions. Which can cause problems with preemption, since a task | ||
26 | switch could happen in between, and change the TS_USEDFPU bit. The | ||
27 | change to TS_USEDFPU would be overwritten by the final store. | ||
28 | |||
29 | In practice, this seldom happened, though, because the 'status' field | ||
30 | was seldom used more than once, so gcc would generally tend to | ||
31 | generate code that used a read-modify-write instruction and thus | ||
32 | happened to avoid this problem - RMW instructions are naturally low | ||
33 | fat and preemption-safe. | ||
34 | |||
35 | - On x86-32, the current_thread_info() pointer would, during interrupts | ||
36 | and softirqs, point to a *copy* of the real thread_info, because | ||
37 | x86-32 uses %esp to calculate the thread_info address, and thus the | ||
38 | separate irq (and softirq) stacks would cause these kinds of odd | ||
39 | thread_info copy aliases. | ||
40 | |||
41 | This is normally not a problem, since interrupts aren't supposed to | ||
42 | look at thread information anyway (what thread is running at | ||
43 | interrupt time really isn't very well-defined), but it confused the | ||
44 | heck out of irq_fpu_usable() and the code that tried to squirrel | ||
45 | away the FPU state. | ||
46 | |||
47 | (It also caused untold confusion for us poor kernel developers). | ||
48 | |||
49 | It also turns out that using 'task_struct' is actually much more natural | ||
50 | for most of the call sites that care about the FPU state, since they | ||
51 | tend to work with the task struct for other reasons anyway (ie | ||
52 | scheduling). And the FPU data that we are going to save/restore is | ||
53 | found there too. | ||
54 | |||
55 | Thanks to Arjan Van De Ven <arjan@linux.intel.com> for pointing us to | ||
56 | the %esp issue. | ||
57 | |||
58 | Cc: Arjan van de Ven <arjan@linux.intel.com> | ||
59 | Reported-and-tested-by: Raphael Prevost <raphael@buro.asia> | ||
60 | Acked-and-tested-by: Suresh Siddha <suresh.b.siddha@intel.com> | ||
61 | Tested-by: Peter Anvin <hpa@zytor.com> | ||
62 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
63 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
64 | --- | ||
65 | arch/x86/include/asm/i387.h | 44 ++++++++++++++++++------------------ | ||
66 | arch/x86/include/asm/processor.h | 1 + | ||
67 | arch/x86/include/asm/thread_info.h | 2 - | ||
68 | arch/x86/kernel/traps.c | 11 ++++----- | ||
69 | arch/x86/kernel/xsave.c | 2 +- | ||
70 | arch/x86/kvm/vmx.c | 2 +- | ||
71 | 6 files changed, 30 insertions(+), 32 deletions(-) | ||
72 | |||
73 | diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h | ||
74 | index 01b115d..f537667 100644 | ||
75 | --- a/arch/x86/include/asm/i387.h | ||
76 | +++ b/arch/x86/include/asm/i387.h | ||
77 | @@ -264,21 +264,21 @@ static inline int restore_fpu_checking(struct task_struct *tsk) | ||
78 | * be preemption protection *and* they need to be | ||
79 | * properly paired with the CR0.TS changes! | ||
80 | */ | ||
81 | -static inline int __thread_has_fpu(struct thread_info *ti) | ||
82 | +static inline int __thread_has_fpu(struct task_struct *tsk) | ||
83 | { | ||
84 | - return ti->status & TS_USEDFPU; | ||
85 | + return tsk->thread.has_fpu; | ||
86 | } | ||
87 | |||
88 | /* Must be paired with an 'stts' after! */ | ||
89 | -static inline void __thread_clear_has_fpu(struct thread_info *ti) | ||
90 | +static inline void __thread_clear_has_fpu(struct task_struct *tsk) | ||
91 | { | ||
92 | - ti->status &= ~TS_USEDFPU; | ||
93 | + tsk->thread.has_fpu = 0; | ||
94 | } | ||
95 | |||
96 | /* Must be paired with a 'clts' before! */ | ||
97 | -static inline void __thread_set_has_fpu(struct thread_info *ti) | ||
98 | +static inline void __thread_set_has_fpu(struct task_struct *tsk) | ||
99 | { | ||
100 | - ti->status |= TS_USEDFPU; | ||
101 | + tsk->thread.has_fpu = 1; | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | @@ -288,16 +288,16 @@ static inline void __thread_set_has_fpu(struct thread_info *ti) | ||
106 | * These generally need preemption protection to work, | ||
107 | * do try to avoid using these on their own. | ||
108 | */ | ||
109 | -static inline void __thread_fpu_end(struct thread_info *ti) | ||
110 | +static inline void __thread_fpu_end(struct task_struct *tsk) | ||
111 | { | ||
112 | - __thread_clear_has_fpu(ti); | ||
113 | + __thread_clear_has_fpu(tsk); | ||
114 | stts(); | ||
115 | } | ||
116 | |||
117 | -static inline void __thread_fpu_begin(struct thread_info *ti) | ||
118 | +static inline void __thread_fpu_begin(struct task_struct *tsk) | ||
119 | { | ||
120 | clts(); | ||
121 | - __thread_set_has_fpu(ti); | ||
122 | + __thread_set_has_fpu(tsk); | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | @@ -308,21 +308,21 @@ extern int restore_i387_xstate(void __user *buf); | ||
127 | |||
128 | static inline void __unlazy_fpu(struct task_struct *tsk) | ||
129 | { | ||
130 | - if (__thread_has_fpu(task_thread_info(tsk))) { | ||
131 | + if (__thread_has_fpu(tsk)) { | ||
132 | __save_init_fpu(tsk); | ||
133 | - __thread_fpu_end(task_thread_info(tsk)); | ||
134 | + __thread_fpu_end(tsk); | ||
135 | } else | ||
136 | tsk->fpu_counter = 0; | ||
137 | } | ||
138 | |||
139 | static inline void __clear_fpu(struct task_struct *tsk) | ||
140 | { | ||
141 | - if (__thread_has_fpu(task_thread_info(tsk))) { | ||
142 | + if (__thread_has_fpu(tsk)) { | ||
143 | /* Ignore delayed exceptions from user space */ | ||
144 | asm volatile("1: fwait\n" | ||
145 | "2:\n" | ||
146 | _ASM_EXTABLE(1b, 2b)); | ||
147 | - __thread_fpu_end(task_thread_info(tsk)); | ||
148 | + __thread_fpu_end(tsk); | ||
149 | } | ||
150 | } | ||
151 | |||
152 | @@ -337,7 +337,7 @@ static inline void __clear_fpu(struct task_struct *tsk) | ||
153 | */ | ||
154 | static inline bool interrupted_kernel_fpu_idle(void) | ||
155 | { | ||
156 | - return !__thread_has_fpu(current_thread_info()) && | ||
157 | + return !__thread_has_fpu(current) && | ||
158 | (read_cr0() & X86_CR0_TS); | ||
159 | } | ||
160 | |||
161 | @@ -371,12 +371,12 @@ static inline bool irq_fpu_usable(void) | ||
162 | |||
163 | static inline void kernel_fpu_begin(void) | ||
164 | { | ||
165 | - struct thread_info *me = current_thread_info(); | ||
166 | + struct task_struct *me = current; | ||
167 | |||
168 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
169 | preempt_disable(); | ||
170 | if (__thread_has_fpu(me)) { | ||
171 | - __save_init_fpu(me->task); | ||
172 | + __save_init_fpu(me); | ||
173 | __thread_clear_has_fpu(me); | ||
174 | /* We do 'stts()' in kernel_fpu_end() */ | ||
175 | } else | ||
176 | @@ -441,13 +441,13 @@ static inline void irq_ts_restore(int TS_state) | ||
177 | */ | ||
178 | static inline int user_has_fpu(void) | ||
179 | { | ||
180 | - return __thread_has_fpu(current_thread_info()); | ||
181 | + return __thread_has_fpu(current); | ||
182 | } | ||
183 | |||
184 | static inline void user_fpu_end(void) | ||
185 | { | ||
186 | preempt_disable(); | ||
187 | - __thread_fpu_end(current_thread_info()); | ||
188 | + __thread_fpu_end(current); | ||
189 | preempt_enable(); | ||
190 | } | ||
191 | |||
192 | @@ -455,7 +455,7 @@ static inline void user_fpu_begin(void) | ||
193 | { | ||
194 | preempt_disable(); | ||
195 | if (!user_has_fpu()) | ||
196 | - __thread_fpu_begin(current_thread_info()); | ||
197 | + __thread_fpu_begin(current); | ||
198 | preempt_enable(); | ||
199 | } | ||
200 | |||
201 | @@ -464,10 +464,10 @@ static inline void user_fpu_begin(void) | ||
202 | */ | ||
203 | static inline void save_init_fpu(struct task_struct *tsk) | ||
204 | { | ||
205 | - WARN_ON_ONCE(!__thread_has_fpu(task_thread_info(tsk))); | ||
206 | + WARN_ON_ONCE(!__thread_has_fpu(tsk)); | ||
207 | preempt_disable(); | ||
208 | __save_init_fpu(tsk); | ||
209 | - __thread_fpu_end(task_thread_info(tsk)); | ||
210 | + __thread_fpu_end(tsk); | ||
211 | preempt_enable(); | ||
212 | } | ||
213 | |||
214 | diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h | ||
215 | index b650435..bb3ee36 100644 | ||
216 | --- a/arch/x86/include/asm/processor.h | ||
217 | +++ b/arch/x86/include/asm/processor.h | ||
218 | @@ -456,6 +456,7 @@ struct thread_struct { | ||
219 | unsigned long trap_no; | ||
220 | unsigned long error_code; | ||
221 | /* floating point and extended processor state */ | ||
222 | + unsigned long has_fpu; | ||
223 | struct fpu fpu; | ||
224 | #ifdef CONFIG_X86_32 | ||
225 | /* Virtual 86 mode info */ | ||
226 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h | ||
227 | index a1fe5c1..d7ef849 100644 | ||
228 | --- a/arch/x86/include/asm/thread_info.h | ||
229 | +++ b/arch/x86/include/asm/thread_info.h | ||
230 | @@ -242,8 +242,6 @@ static inline struct thread_info *current_thread_info(void) | ||
231 | * ever touches our thread-synchronous status, so we don't | ||
232 | * have to worry about atomic accesses. | ||
233 | */ | ||
234 | -#define TS_USEDFPU 0x0001 /* FPU was used by this task | ||
235 | - this quantum (SMP) */ | ||
236 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ | ||
237 | #define TS_POLLING 0x0004 /* idle task polling need_resched, | ||
238 | skip sending interrupt */ | ||
239 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | ||
240 | index c24bb75..a99badf 100644 | ||
241 | --- a/arch/x86/kernel/traps.c | ||
242 | +++ b/arch/x86/kernel/traps.c | ||
243 | @@ -573,12 +573,11 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) | ||
244 | */ | ||
245 | void math_state_restore(void) | ||
246 | { | ||
247 | - struct thread_info *thread = current_thread_info(); | ||
248 | - struct task_struct *tsk = thread->task; | ||
249 | + struct task_struct *tsk = current; | ||
250 | |||
251 | /* We need a safe address that is cheap to find and that is already | ||
252 | - in L1. We just brought in "thread->task", so use that */ | ||
253 | -#define safe_address (thread->task) | ||
254 | + in L1. We're just bringing in "tsk->thread.has_fpu", so use that */ | ||
255 | +#define safe_address (tsk->thread.has_fpu) | ||
256 | |||
257 | if (!tsk_used_math(tsk)) { | ||
258 | local_irq_enable(); | ||
259 | @@ -595,7 +594,7 @@ void math_state_restore(void) | ||
260 | local_irq_disable(); | ||
261 | } | ||
262 | |||
263 | - __thread_fpu_begin(thread); | ||
264 | + __thread_fpu_begin(tsk); | ||
265 | |||
266 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | ||
267 | is pending. Clear the x87 state here by setting it to fixed | ||
268 | @@ -611,7 +610,7 @@ void math_state_restore(void) | ||
269 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
270 | */ | ||
271 | if (unlikely(restore_fpu_checking(tsk))) { | ||
272 | - __thread_fpu_end(thread); | ||
273 | + __thread_fpu_end(tsk); | ||
274 | force_sig(SIGSEGV, tsk); | ||
275 | return; | ||
276 | } | ||
277 | diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c | ||
278 | index a0bcd0d..7110911 100644 | ||
279 | --- a/arch/x86/kernel/xsave.c | ||
280 | +++ b/arch/x86/kernel/xsave.c | ||
281 | @@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_struct *tsk) | ||
282 | if (!fx) | ||
283 | return; | ||
284 | |||
285 | - BUG_ON(__thread_has_fpu(task_thread_info(tsk))); | ||
286 | + BUG_ON(__thread_has_fpu(tsk)); | ||
287 | |||
288 | xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; | ||
289 | |||
290 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c | ||
291 | index b2c612d..4ea7678 100644 | ||
292 | --- a/arch/x86/kvm/vmx.c | ||
293 | +++ b/arch/x86/kvm/vmx.c | ||
294 | @@ -1456,7 +1456,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | ||
295 | #ifdef CONFIG_X86_64 | ||
296 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
297 | #endif | ||
298 | - if (__thread_has_fpu(current_thread_info())) | ||
299 | + if (__thread_has_fpu(current)) | ||
300 | clts(); | ||
301 | load_gdt(&__get_cpu_var(host_gdt)); | ||
302 | } | ||
303 | -- | ||
304 | 1.7.7.4 | ||
305 | |||
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0010-i387-re-introduce-FPU-state-preloading-at-context-sw.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0010-i387-re-introduce-FPU-state-preloading-at-context-sw.patch new file mode 100644 index 00000000..95d4a73e --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0010-i387-re-introduce-FPU-state-preloading-at-context-sw.patch | |||
@@ -0,0 +1,353 @@ | |||
1 | From 9016ec427136d5b5d025948319cf1114dc7734e4 Mon Sep 17 00:00:00 2001 | ||
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | ||
3 | Date: Sat, 18 Feb 2012 12:56:35 -0800 | ||
4 | Subject: [PATCH 10/11] i387: re-introduce FPU state preloading at context | ||
5 | switch time | ||
6 | |||
7 | commit 34ddc81a230b15c0e345b6b253049db731499f7e upstream. | ||
8 | |||
9 | After all the FPU state cleanups and finally finding the problem that | ||
10 | caused all our FPU save/restore problems, this re-introduces the | ||
11 | preloading of FPU state that was removed in commit b3b0870ef3ff ("i387: | ||
12 | do not preload FPU state at task switch time"). | ||
13 | |||
14 | However, instead of simply reverting the removal, this reimplements | ||
15 | preloading with several fixes, most notably | ||
16 | |||
17 | - properly abstracted as a true FPU state switch, rather than as | ||
18 | open-coded save and restore with various hacks. | ||
19 | |||
20 | In particular, implementing it as a proper FPU state switch allows us | ||
21 | to optimize the CR0.TS flag accesses: there is no reason to set the | ||
22 | TS bit only to then almost immediately clear it again. CR0 accesses | ||
23 | are quite slow and expensive, don't flip the bit back and forth for | ||
24 | no good reason. | ||
25 | |||
26 | - Make sure that the same model works for both x86-32 and x86-64, so | ||
27 | that there are no gratuitous differences between the two due to the | ||
28 | way they save and restore segment state differently due to | ||
29 | architectural differences that really don't matter to the FPU state. | ||
30 | |||
31 | - Avoid exposing the "preload" state to the context switch routines, | ||
32 | and in particular allow the concept of lazy state restore: if nothing | ||
33 | else has used the FPU in the meantime, and the process is still on | ||
34 | the same CPU, we can avoid restoring state from memory entirely, just | ||
35 | re-expose the state that is still in the FPU unit. | ||
36 | |||
37 | That optimized lazy restore isn't actually implemented here, but the | ||
38 | infrastructure is set up for it. Of course, older CPU's that use | ||
39 | 'fnsave' to save the state cannot take advantage of this, since the | ||
40 | state saving also trashes the state. | ||
41 | |||
42 | In other words, there is now an actual _design_ to the FPU state saving, | ||
43 | rather than just random historical baggage. Hopefully it's easier to | ||
44 | follow as a result. | ||
45 | |||
46 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | ||
47 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
48 | --- | ||
49 | arch/x86/include/asm/i387.h | 110 +++++++++++++++++++++++++++++++++++------- | ||
50 | arch/x86/kernel/process_32.c | 5 ++- | ||
51 | arch/x86/kernel/process_64.c | 5 ++- | ||
52 | arch/x86/kernel/traps.c | 55 ++++++++++++--------- | ||
53 | 4 files changed, 133 insertions(+), 42 deletions(-) | ||
54 | |||
55 | diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h | ||
56 | index f537667..a850b4d 100644 | ||
57 | --- a/arch/x86/include/asm/i387.h | ||
58 | +++ b/arch/x86/include/asm/i387.h | ||
59 | @@ -29,6 +29,7 @@ extern unsigned int sig_xstate_size; | ||
60 | extern void fpu_init(void); | ||
61 | extern void mxcsr_feature_mask_init(void); | ||
62 | extern int init_fpu(struct task_struct *child); | ||
63 | +extern void __math_state_restore(struct task_struct *); | ||
64 | extern void math_state_restore(void); | ||
65 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | ||
66 | |||
67 | @@ -212,9 +213,10 @@ static inline void fpu_fxsave(struct fpu *fpu) | ||
68 | #endif /* CONFIG_X86_64 */ | ||
69 | |||
70 | /* | ||
71 | - * These must be called with preempt disabled | ||
72 | + * These must be called with preempt disabled. Returns | ||
73 | + * 'true' if the FPU state is still intact. | ||
74 | */ | ||
75 | -static inline void fpu_save_init(struct fpu *fpu) | ||
76 | +static inline int fpu_save_init(struct fpu *fpu) | ||
77 | { | ||
78 | if (use_xsave()) { | ||
79 | fpu_xsave(fpu); | ||
80 | @@ -223,22 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu) | ||
81 | * xsave header may indicate the init state of the FP. | ||
82 | */ | ||
83 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) | ||
84 | - return; | ||
85 | + return 1; | ||
86 | } else if (use_fxsr()) { | ||
87 | fpu_fxsave(fpu); | ||
88 | } else { | ||
89 | asm volatile("fnsave %[fx]; fwait" | ||
90 | : [fx] "=m" (fpu->state->fsave)); | ||
91 | - return; | ||
92 | + return 0; | ||
93 | } | ||
94 | |||
95 | - if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) | ||
96 | + /* | ||
97 | + * If exceptions are pending, we need to clear them so | ||
98 | + * that we don't randomly get exceptions later. | ||
99 | + * | ||
100 | + * FIXME! Is this perhaps only true for the old-style | ||
101 | + * irq13 case? Maybe we could leave the x87 state | ||
102 | + * intact otherwise? | ||
103 | + */ | ||
104 | + if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { | ||
105 | asm volatile("fnclex"); | ||
106 | + return 0; | ||
107 | + } | ||
108 | + return 1; | ||
109 | } | ||
110 | |||
111 | -static inline void __save_init_fpu(struct task_struct *tsk) | ||
112 | +static inline int __save_init_fpu(struct task_struct *tsk) | ||
113 | { | ||
114 | - fpu_save_init(&tsk->thread.fpu); | ||
115 | + return fpu_save_init(&tsk->thread.fpu); | ||
116 | } | ||
117 | |||
118 | static inline int fpu_fxrstor_checking(struct fpu *fpu) | ||
119 | @@ -301,20 +314,79 @@ static inline void __thread_fpu_begin(struct task_struct *tsk) | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | - * Signal frame handlers... | ||
124 | + * FPU state switching for scheduling. | ||
125 | + * | ||
126 | + * This is a two-stage process: | ||
127 | + * | ||
128 | + * - switch_fpu_prepare() saves the old state and | ||
129 | + * sets the new state of the CR0.TS bit. This is | ||
130 | + * done within the context of the old process. | ||
131 | + * | ||
132 | + * - switch_fpu_finish() restores the new state as | ||
133 | + * necessary. | ||
134 | */ | ||
135 | -extern int save_i387_xstate(void __user *buf); | ||
136 | -extern int restore_i387_xstate(void __user *buf); | ||
137 | +typedef struct { int preload; } fpu_switch_t; | ||
138 | + | ||
139 | +/* | ||
140 | + * FIXME! We could do a totally lazy restore, but we need to | ||
141 | + * add a per-cpu "this was the task that last touched the FPU | ||
142 | + * on this CPU" variable, and the task needs to have a "I last | ||
143 | + * touched the FPU on this CPU" and check them. | ||
144 | + * | ||
145 | + * We don't do that yet, so "fpu_lazy_restore()" always returns | ||
146 | + * false, but some day.. | ||
147 | + */ | ||
148 | +#define fpu_lazy_restore(tsk) (0) | ||
149 | +#define fpu_lazy_state_intact(tsk) do { } while (0) | ||
150 | + | ||
151 | +static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new) | ||
152 | +{ | ||
153 | + fpu_switch_t fpu; | ||
154 | + | ||
155 | + fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; | ||
156 | + if (__thread_has_fpu(old)) { | ||
157 | + if (__save_init_fpu(old)) | ||
158 | + fpu_lazy_state_intact(old); | ||
159 | + __thread_clear_has_fpu(old); | ||
160 | + old->fpu_counter++; | ||
161 | + | ||
162 | + /* Don't change CR0.TS if we just switch! */ | ||
163 | + if (fpu.preload) { | ||
164 | + __thread_set_has_fpu(new); | ||
165 | + prefetch(new->thread.fpu.state); | ||
166 | + } else | ||
167 | + stts(); | ||
168 | + } else { | ||
169 | + old->fpu_counter = 0; | ||
170 | + if (fpu.preload) { | ||
171 | + if (fpu_lazy_restore(new)) | ||
172 | + fpu.preload = 0; | ||
173 | + else | ||
174 | + prefetch(new->thread.fpu.state); | ||
175 | + __thread_fpu_begin(new); | ||
176 | + } | ||
177 | + } | ||
178 | + return fpu; | ||
179 | +} | ||
180 | |||
181 | -static inline void __unlazy_fpu(struct task_struct *tsk) | ||
182 | +/* | ||
183 | + * By the time this gets called, we've already cleared CR0.TS and | ||
184 | + * given the process the FPU if we are going to preload the FPU | ||
185 | + * state - all we need to do is to conditionally restore the register | ||
186 | + * state itself. | ||
187 | + */ | ||
188 | +static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) | ||
189 | { | ||
190 | - if (__thread_has_fpu(tsk)) { | ||
191 | - __save_init_fpu(tsk); | ||
192 | - __thread_fpu_end(tsk); | ||
193 | - } else | ||
194 | - tsk->fpu_counter = 0; | ||
195 | + if (fpu.preload) | ||
196 | + __math_state_restore(new); | ||
197 | } | ||
198 | |||
199 | +/* | ||
200 | + * Signal frame handlers... | ||
201 | + */ | ||
202 | +extern int save_i387_xstate(void __user *buf); | ||
203 | +extern int restore_i387_xstate(void __user *buf); | ||
204 | + | ||
205 | static inline void __clear_fpu(struct task_struct *tsk) | ||
206 | { | ||
207 | if (__thread_has_fpu(tsk)) { | ||
208 | @@ -474,7 +546,11 @@ static inline void save_init_fpu(struct task_struct *tsk) | ||
209 | static inline void unlazy_fpu(struct task_struct *tsk) | ||
210 | { | ||
211 | preempt_disable(); | ||
212 | - __unlazy_fpu(tsk); | ||
213 | + if (__thread_has_fpu(tsk)) { | ||
214 | + __save_init_fpu(tsk); | ||
215 | + __thread_fpu_end(tsk); | ||
216 | + } else | ||
217 | + tsk->fpu_counter = 0; | ||
218 | preempt_enable(); | ||
219 | } | ||
220 | |||
221 | diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c | ||
222 | index 0cdb4fa..8598296 100644 | ||
223 | --- a/arch/x86/kernel/process_32.c | ||
224 | +++ b/arch/x86/kernel/process_32.c | ||
225 | @@ -297,10 +297,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
226 | *next = &next_p->thread; | ||
227 | int cpu = smp_processor_id(); | ||
228 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
229 | + fpu_switch_t fpu; | ||
230 | |||
231 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | ||
232 | |||
233 | - __unlazy_fpu(prev_p); | ||
234 | + fpu = switch_fpu_prepare(prev_p, next_p); | ||
235 | |||
236 | /* | ||
237 | * Reload esp0. | ||
238 | @@ -355,6 +356,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
239 | if (prev->gs | next->gs) | ||
240 | lazy_load_gs(next->gs); | ||
241 | |||
242 | + switch_fpu_finish(next_p, fpu); | ||
243 | + | ||
244 | percpu_write(current_task, next_p); | ||
245 | |||
246 | return prev_p; | ||
247 | diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c | ||
248 | index 042b18f..6a364a6 100644 | ||
249 | --- a/arch/x86/kernel/process_64.c | ||
250 | +++ b/arch/x86/kernel/process_64.c | ||
251 | @@ -381,8 +381,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
252 | int cpu = smp_processor_id(); | ||
253 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
254 | unsigned fsindex, gsindex; | ||
255 | + fpu_switch_t fpu; | ||
256 | |||
257 | - __unlazy_fpu(prev_p); | ||
258 | + fpu = switch_fpu_prepare(prev_p, next_p); | ||
259 | |||
260 | /* | ||
261 | * Reload esp0, LDT and the page table pointer: | ||
262 | @@ -452,6 +453,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
263 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | ||
264 | prev->gsindex = gsindex; | ||
265 | |||
266 | + switch_fpu_finish(next_p, fpu); | ||
267 | + | ||
268 | /* | ||
269 | * Switch the PDA and FPU contexts. | ||
270 | */ | ||
271 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | ||
272 | index a99badf..31d9d0f 100644 | ||
273 | --- a/arch/x86/kernel/traps.c | ||
274 | +++ b/arch/x86/kernel/traps.c | ||
275 | @@ -562,6 +562,37 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | + * This gets called with the process already owning the | ||
280 | + * FPU state, and with CR0.TS cleared. It just needs to | ||
281 | + * restore the FPU register state. | ||
282 | + */ | ||
283 | +void __math_state_restore(struct task_struct *tsk) | ||
284 | +{ | ||
285 | + /* We need a safe address that is cheap to find and that is already | ||
286 | + in L1. We've just brought in "tsk->thread.has_fpu", so use that */ | ||
287 | +#define safe_address (tsk->thread.has_fpu) | ||
288 | + | ||
289 | + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | ||
290 | + is pending. Clear the x87 state here by setting it to fixed | ||
291 | + values. safe_address is a random variable that should be in L1 */ | ||
292 | + alternative_input( | ||
293 | + ASM_NOP8 ASM_NOP2, | ||
294 | + "emms\n\t" /* clear stack tags */ | ||
295 | + "fildl %P[addr]", /* set F?P to defined value */ | ||
296 | + X86_FEATURE_FXSAVE_LEAK, | ||
297 | + [addr] "m" (safe_address)); | ||
298 | + | ||
299 | + /* | ||
300 | + * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
301 | + */ | ||
302 | + if (unlikely(restore_fpu_checking(tsk))) { | ||
303 | + __thread_fpu_end(tsk); | ||
304 | + force_sig(SIGSEGV, tsk); | ||
305 | + return; | ||
306 | + } | ||
307 | +} | ||
308 | + | ||
309 | +/* | ||
310 | * 'math_state_restore()' saves the current math information in the | ||
311 | * old math state array, and gets the new ones from the current task | ||
312 | * | ||
313 | @@ -575,10 +606,6 @@ void math_state_restore(void) | ||
314 | { | ||
315 | struct task_struct *tsk = current; | ||
316 | |||
317 | - /* We need a safe address that is cheap to find and that is already | ||
318 | - in L1. We're just bringing in "tsk->thread.has_fpu", so use that */ | ||
319 | -#define safe_address (tsk->thread.has_fpu) | ||
320 | - | ||
321 | if (!tsk_used_math(tsk)) { | ||
322 | local_irq_enable(); | ||
323 | /* | ||
324 | @@ -595,25 +622,7 @@ void math_state_restore(void) | ||
325 | } | ||
326 | |||
327 | __thread_fpu_begin(tsk); | ||
328 | - | ||
329 | - /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | ||
330 | - is pending. Clear the x87 state here by setting it to fixed | ||
331 | - values. safe_address is a random variable that should be in L1 */ | ||
332 | - alternative_input( | ||
333 | - ASM_NOP8 ASM_NOP2, | ||
334 | - "emms\n\t" /* clear stack tags */ | ||
335 | - "fildl %P[addr]", /* set F?P to defined value */ | ||
336 | - X86_FEATURE_FXSAVE_LEAK, | ||
337 | - [addr] "m" (safe_address)); | ||
338 | - | ||
339 | - /* | ||
340 | - * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
341 | - */ | ||
342 | - if (unlikely(restore_fpu_checking(tsk))) { | ||
343 | - __thread_fpu_end(tsk); | ||
344 | - force_sig(SIGSEGV, tsk); | ||
345 | - return; | ||
346 | - } | ||
347 | + __math_state_restore(tsk); | ||
348 | |||
349 | tsk->fpu_counter++; | ||
350 | } | ||
351 | -- | ||
352 | 1.7.7.4 | ||
353 | |||
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0011-Linux-3.2.8.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0011-Linux-3.2.8.patch new file mode 100644 index 00000000..e3189b68 --- /dev/null +++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.8/0011-Linux-3.2.8.patch | |||
@@ -0,0 +1,24 @@ | |||
1 | From 1de504ea25617f701ac3a246a1c9dfd2246d4900 Mon Sep 17 00:00:00 2001 | ||
2 | From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | ||
3 | Date: Mon, 27 Feb 2012 10:26:22 -0800 | ||
4 | Subject: [PATCH 11/11] Linux 3.2.8 | ||
5 | |||
6 | --- | ||
7 | Makefile | 2 +- | ||
8 | 1 files changed, 1 insertions(+), 1 deletions(-) | ||
9 | |||
10 | diff --git a/Makefile b/Makefile | ||
11 | index d1bdc90..7df8a84 100644 | ||
12 | --- a/Makefile | ||
13 | +++ b/Makefile | ||
14 | @@ -1,6 +1,6 @@ | ||
15 | VERSION = 3 | ||
16 | PATCHLEVEL = 2 | ||
17 | -SUBLEVEL = 7 | ||
18 | +SUBLEVEL = 8 | ||
19 | EXTRAVERSION = | ||
20 | NAME = Saber-toothed Squirrel | ||
21 | |||
22 | -- | ||
23 | 1.7.7.4 | ||
24 | |||