summaryrefslogtreecommitdiffstats
path: root/recipes-kernel/linux
diff options
context:
space:
mode:
Diffstat (limited to 'recipes-kernel/linux')
-rw-r--r--recipes-kernel/linux/linux-dtb.inc68
-rw-r--r--recipes-kernel/linux/linux-machine-common.inc5
-rw-r--r--recipes-kernel/linux/linux-machine-config.inc45
-rw-r--r--recipes-kernel/linux/linux-machine-kconfig.inc21
-rw-r--r--recipes-kernel/linux/linux-xlnx-dev.bb42
-rw-r--r--recipes-kernel/linux/linux-xlnx-prt_3.8.bb22
-rw-r--r--recipes-kernel/linux/linux-xlnx.inc25
-rw-r--r--recipes-kernel/linux/linux-xlnx/config-3.8.11-rt8-xlnx2672
-rw-r--r--recipes-kernel/linux/linux-xlnx/libtraceevent-Remove-hard-coded-include-to-usr-local.patch38
-rw-r--r--recipes-kernel/linux/linux-xlnx/patch-3.8.11-rt8-xlnx.patch26332
-rw-r--r--recipes-kernel/linux/linux-xlnx/preempt-rt.cfg3
-rw-r--r--recipes-kernel/linux/linux-xlnx_3.10.bb10
-rw-r--r--recipes-kernel/linux/linux-xlnx_3.8.bb13
-rw-r--r--recipes-kernel/linux/linux-yocto-dev.bbappend5
-rw-r--r--recipes-kernel/linux/linux-yocto/ec2eba55f0c0e74dd39aca14dcc597583cf1eb67.patch70
-rw-r--r--recipes-kernel/linux/linux-yocto_3.10.bbappend8
-rw-r--r--recipes-kernel/linux/linux-yocto_3.14.bbappend5
17 files changed, 29384 insertions, 0 deletions
diff --git a/recipes-kernel/linux/linux-dtb.inc b/recipes-kernel/linux/linux-dtb.inc
new file mode 100644
index 00000000..5efb9a0d
--- /dev/null
+++ b/recipes-kernel/linux/linux-dtb.inc
@@ -0,0 +1,68 @@
1# Support for device tree generation
2FILES_kernel-devicetree = "/boot/devicetree*"
3OOT_KERNEL_DEVICETREE_FLAGS ?= "-R 8 -p 0x3000"
4
5python __anonymous () {
6 oot_devicetree = d.getVar("OOT_KERNEL_DEVICETREE", True) or ''
7 if oot_devicetree:
8 depends = d.getVar("DEPENDS", True)
9 d.setVar("DEPENDS", "%s dtc-native" % depends)
10 packages = d.getVar("PACKAGES", True)
11 if "kernel-devicetree" not in packages:
12 d.setVar("PACKAGES", "%s kernel-devicetree" % packages)
13}
14
15do_install_append() {
16 if test -n "${OOT_KERNEL_DEVICETREE}"; then
17 for DTS_FILE in ${OOT_KERNEL_DEVICETREE}; do
18 if [ ! -f ${DTS_FILE} ]; then
19 echo "Warning: ${DTS_FILE} is not available!"
20 continue
21 fi
22 DTS_BASE_NAME=`basename ${DTS_FILE} | awk -F "." '{print $1}'`
23 DTB_NAME=`echo ${KERNEL_IMAGE_BASE_NAME} | sed "s/${MACHINE}/${DTS_BASE_NAME}/g"`
24 DTB_SYMLINK_NAME=`echo ${KERNEL_IMAGE_SYMLINK_NAME} | sed "s/${MACHINE}/${DTS_BASE_NAME}/g"`
25 dtc -I dts -O dtb ${OOT_KERNEL_DEVICETREE_FLAGS} -o ${DTS_BASE_NAME} ${DTS_FILE}
26 install -m 0644 ${DTS_BASE_NAME} ${D}/${KERNEL_IMAGEDEST}/devicetree-${DTB_SYMLINK_NAME}.dtb
27 done
28 fi
29}
30
31do_deploy_append() {
32 if test -n "${OOT_KERNEL_DEVICETREE}"; then
33 for DTS_FILE in ${OOT_KERNEL_DEVICETREE}; do
34 if [ ! -f ${DTS_FILE} ]; then
35 echo "Warning: ${DTS_FILE} is not available!"
36 continue
37 fi
38 DTS_BASE_NAME=`basename ${DTS_FILE} | awk -F "." '{print $1}'`
39 DTB_NAME=`echo ${KERNEL_IMAGE_BASE_NAME} | sed "s/${MACHINE}/${DTS_BASE_NAME}/g"`
40 DTB_SYMLINK_NAME=`echo ${KERNEL_IMAGE_SYMLINK_NAME} | sed "s/${MACHINE}/${DTS_BASE_NAME}/g"`
41 install -d ${DEPLOYDIR}
42 install -m 0644 ${B}/${DTS_BASE_NAME} ${DEPLOYDIR}/${DTB_NAME}.dtb
43 cd ${DEPLOYDIR}
44 ln -sf ${DTB_NAME}.dtb ${DTB_SYMLINK_NAME}.dtb
45 cd -
46 done
47 fi
48}
49
50pkg_postinst_kernel-devicetree () {
51 cd /${KERNEL_IMAGEDEST}
52 for DTS_FILE in ${OOT_KERNEL_DEVICETREE}
53 do
54 DTS_BASE_NAME=`basename ${DTS_FILE} | awk -F "." '{print $1}'`
55 DTB_SYMLINK_NAME=`echo ${KERNEL_IMAGE_SYMLINK_NAME} | sed "s/${MACHINE}/${DTS_BASE_NAME}/g"`
56 update-alternatives --install /${KERNEL_IMAGEDEST}/${DTS_BASE_NAME}.dtb ${DTS_BASE_NAME}.dtb devicetree-${DTB_SYMLINK_NAME}.dtb ${KERNEL_PRIORITY} || true
57 done
58}
59
60pkg_postrm_kernel-devicetree () {
61 cd /${KERNEL_IMAGEDEST}
62 for DTS_FILE in ${OOT_KERNEL_DEVICETREE}
63 do
64 DTS_BASE_NAME=`basename ${DTS_FILE} | awk -F "." '{print $1}'`
65 DTB_SYMLINK_NAME=`echo ${KERNEL_IMAGE_SYMLINK_NAME} | sed "s/${MACHINE}/${DTS_BASE_NAME}/g"`
66 update-alternatives --remove ${DTS_BASE_NAME}.dtb devicetree-${DTB_SYMLINK_NAME}.dtb ${KERNEL_PRIORITY} || true
67 done
68}
diff --git a/recipes-kernel/linux/linux-machine-common.inc b/recipes-kernel/linux/linux-machine-common.inc
new file mode 100644
index 00000000..0f79e1bb
--- /dev/null
+++ b/recipes-kernel/linux/linux-machine-common.inc
@@ -0,0 +1,5 @@
1require linux-dtb.inc
2require linux-machine-config.inc
3
4# MicroBlaze is a uImage target, but its not called 'uImage'
5DEPENDS_append_microblaze += "u-boot-mkimage-native"
diff --git a/recipes-kernel/linux/linux-machine-config.inc b/recipes-kernel/linux/linux-machine-config.inc
new file mode 100644
index 00000000..f4e7fa54
--- /dev/null
+++ b/recipes-kernel/linux/linux-machine-config.inc
@@ -0,0 +1,45 @@
1#
2# This include file implements the MACHINE_DEVICETREE and MACHINE_KCONFIG
3# variable handling for the Kernel.
4#
5# e.g. (set in the associated <machine>.conf)
6# MACHINE_DEVICETREE := "zc702/zc702-zynq7.dts"
7# MACHINE_KCONFIG := "common/rtc.cfg"
8#
9# This will expand out to:
10# SRC_URI_append += "file://zc702/zc702-zynq7.dts file://common/rtc.cfg"
11# OOT_KERNEL_DEVICETREE ?= "${WORKDIR}/devicetree/zc702-zynq7.dts"
12#
13# This include also adds all the 'conf/machine/boards' for all layers (that are
14# available) to the FILESEXTRAPATHS.
15#
16
17inherit xilinx-utils
18
19# If OOT_KERNEL_DEVICETREE is not set, default to the device tree's provided by
20# MACHINE_DEVICETREE
21OOT_KERNEL_DEVICETREE ?= "${@expand_dir_basepaths_by_extension("MACHINE_DEVICETREE", os.path.join(d.getVar("WORKDIR", True), 'devicetree'), '.dts', d)}"
22
23# Appends the '<layer>/conf/machine/boards' path to FILESEXTRAPATHS for all
24# layers (using the ${BBPATH})
25FILESEXTRAPATHS_append := "${@get_additional_bbpath_filespath('conf/machine/boards', d)}"
26
27# Using the MACHINE_DEVICETREE and MACHINE_KCONFIG vars, append them to SRC_URI
28SRC_URI_append += " \
29 ${@paths_affix(d.getVar("MACHINE_DEVICETREE", True) or '', prefix = 'file://')} \
30 ${@paths_affix(d.getVar("MACHINE_KCONFIG", True) or '', prefix = 'file://')} \
31 "
32
33# Copy all device tree's into the same directory. This is due to compatibility
34# with dtc and the use of DTSI (Device Tree Includes), the version of DTC in
35# Yocto does not provide include path support.
36do_install_prepend() {
37 if test -n "${MACHINE_DEVICETREE}"; then
38 mkdir -p ${WORKDIR}/devicetree
39 for i in ${MACHINE_DEVICETREE}; do
40 if test -e ${WORKDIR}/$i; then
41 cp ${WORKDIR}/$i ${WORKDIR}/devicetree
42 fi
43 done
44 fi
45}
diff --git a/recipes-kernel/linux/linux-machine-kconfig.inc b/recipes-kernel/linux/linux-machine-kconfig.inc
new file mode 100644
index 00000000..881847d0
--- /dev/null
+++ b/recipes-kernel/linux/linux-machine-kconfig.inc
@@ -0,0 +1,21 @@
1#
2# This include file implements the merging of *.cfg files from the SRC_URI for
3# the Kernel.
4#
5
6# returns all the elements from the src uri that are .cfg files
7def find_config_fragments(d):
8 sources=src_patches(d, True)
9 sources_list=[]
10 for s in sources:
11 if s.endswith('.cfg'):
12 sources_list.append(s)
13 return sources_list
14
15kernel_do_configure_prepend() {
16 # Find all ".cfg" files and merge them together into a .config
17 CFG_FILES="${@" ".join(find_config_fragments(d))}"
18 if [ ! -z "$CFG_FILES" ]; then
19 ${S}/scripts/kconfig/merge_config.sh -m $CFG_FILES
20 fi
21}
diff --git a/recipes-kernel/linux/linux-xlnx-dev.bb b/recipes-kernel/linux/linux-xlnx-dev.bb
new file mode 100644
index 00000000..b1800e9c
--- /dev/null
+++ b/recipes-kernel/linux/linux-xlnx-dev.bb
@@ -0,0 +1,42 @@
1# This recipe tracks the 'bleeding edge' linux-xlnx repository.
2# Since this tree is frequently updated, AUTOREV is used to track its contents.
3#
4# To enable this recipe, set PREFERRED_PROVIDER_virtual/kernel = "linux-yocto-dev"
5
6# These variables should reflect the linux-xlnx SRCREV and Linux version of
7# latest upstream linux release merge. If these are out of date, please email
8# meta-xilinx@yoctoproject.org to have it corrected. The linux-xlnx-dev build
9# should still function correctly with out of date values, but the naming will
10# be missleading.
11LINUX_XLNX_NEARTOP_SRCREV = "669ee45083e22963d7fb7b774c5d7893ed35de2e"
12LINUX_XLNX_NEARTOP_VERSION = "3.10"
13
14# LINUX_VERSION is set to select the defconfig and dts we wish to use within
15# meta-xilinx. If the build fails it may be because this defconfig is out
16# of date. If that happens or if the developer desires a non meta-xilinx config
17# they should add the defconfig and name it appropriately so the recipes find
18# the file.
19LINUX_VERSION ?= "3.8"
20
21LINUX_VERSION_EXTENSION ?= "-xilinx-dev"
22
23KBRANCH ?= "master-next"
24KBRANCH_DEFAULT = "master"
25
26include linux-xlnx.inc
27
28# Set default SRCREVs. SRCREVs statically set to prevent network access during
29# parsing. If linux-xlnx-dev is the preferred provider, they will be overridden
30# to AUTOREV in the anonymous python routine and resolved when the variables
31# are finalized.
32SRCREV="${LINUX_XLNX_NEARTOP_SRCREV}"
33
34python () {
35 if d.getVar("PREFERRED_PROVIDER_virtual/kernel", True) != "linux-xlnx-dev":
36 raise bb.parse.SkipPackage("Set PREFERRED_PROVIDER_virtual/kernel to linux-xlnx-dev to enable it")
37 else:
38 d.setVar("SRCREV", "${AUTOREV}")
39}
40
41PR = "r0"
42PV = "${LINUX_XLNX_NEARTOP_VERSION}+${LINUX_VERSION_EXTENSION}+git${SRCREV}"
diff --git a/recipes-kernel/linux/linux-xlnx-prt_3.8.bb b/recipes-kernel/linux/linux-xlnx-prt_3.8.bb
new file mode 100644
index 00000000..6e61d83d
--- /dev/null
+++ b/recipes-kernel/linux/linux-xlnx-prt_3.8.bb
@@ -0,0 +1,22 @@
1KBRANCH = "xlnx_3.8"
2
3# See include file for common information
4include linux-xlnx.inc
5
6PR = "r1"
7
8# Kernel version and SRCREV correspond to: xlnx_3.8 branch
9LINUX_VERSION = "3.8"
10SRCREV = "f4ff79d44a966ebea6229213816d17eb472b303e"
11
12SRC_URI_append = " \
13 file://patch-3.8.11-rt8-xlnx.patch \
14 file://preempt-rt.cfg \
15 file://config-3.8.11-rt8-xlnx \
16 "
17
18do_configure_append () {
19 cd ${B}
20 rm -f .config
21 cp ../config-3.8.11-rt8-xlnx ./.config
22}
diff --git a/recipes-kernel/linux/linux-xlnx.inc b/recipes-kernel/linux/linux-xlnx.inc
new file mode 100644
index 00000000..3caddc87
--- /dev/null
+++ b/recipes-kernel/linux/linux-xlnx.inc
@@ -0,0 +1,25 @@
1DESCRIPTION = "Xilinx Kernel"
2SECTION = "kernel"
3LICENSE = "GPLv2"
4
5LIC_FILES_CHKSUM = "file://COPYING;md5=d7810fab7487fb0aad327b76f1be7cd7"
6
7# This version extension should match CONFIG_LOCALVERSION in defconfig
8LINUX_VERSION_EXTENSION ?= "-xilinx"
9PV = "${LINUX_VERSION}${LINUX_VERSION_EXTENSION}+git${SRCREV}"
10
11# Sources
12KBRANCH ?= "master"
13SRC_URI = "git://github.com/Xilinx/linux-xlnx.git;protocol=https;branch=${KBRANCH}"
14FILESEXTRAPATHS_prepend := "${THISDIR}/linux-xlnx:"
15
16# Source Directory
17S = "${WORKDIR}/git"
18
19# Inherit/include base functionality
20inherit kernel
21require linux-machine-common.inc
22require linux-machine-kconfig.inc
23
24# Override COMPATIBLE_MACHINE to include your machine in a bbappend file.
25COMPATIBLE_MACHINE = "qemumicroblaze|qemuzynq|microblaze|zynq"
diff --git a/recipes-kernel/linux/linux-xlnx/config-3.8.11-rt8-xlnx b/recipes-kernel/linux/linux-xlnx/config-3.8.11-rt8-xlnx
new file mode 100644
index 00000000..7b9a0f61
--- /dev/null
+++ b/recipes-kernel/linux/linux-xlnx/config-3.8.11-rt8-xlnx
@@ -0,0 +1,2672 @@
1#
2# Automatically generated file; DO NOT EDIT.
3# Linux/arm 3.8.11 Kernel Configuration
4#
5CONFIG_ARM=y
6CONFIG_MIGHT_HAVE_PCI=y
7CONFIG_SYS_SUPPORTS_APM_EMULATION=y
8CONFIG_GENERIC_GPIO=y
9CONFIG_HAVE_PROC_CPU=y
10CONFIG_NO_IOPORT=y
11CONFIG_STACKTRACE_SUPPORT=y
12CONFIG_LOCKDEP_SUPPORT=y
13CONFIG_TRACE_IRQFLAGS_SUPPORT=y
14CONFIG_RWSEM_GENERIC_SPINLOCK=y
15CONFIG_ARCH_HAS_CPUFREQ=y
16CONFIG_GENERIC_HWEIGHT=y
17CONFIG_GENERIC_CALIBRATE_DELAY=y
18CONFIG_NEED_DMA_MAP_STATE=y
19CONFIG_VECTORS_BASE=0xffff0000
20CONFIG_ARM_PATCH_PHYS_VIRT=y
21CONFIG_GENERIC_BUG=y
22CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
23CONFIG_HAVE_IRQ_WORK=y
24CONFIG_IRQ_WORK=y
25CONFIG_BUILDTIME_EXTABLE_SORT=y
26
27#
28# General setup
29#
30CONFIG_EXPERIMENTAL=y
31CONFIG_INIT_ENV_ARG_LIMIT=32
32CONFIG_CROSS_COMPILE=""
33CONFIG_LOCALVERSION="-xilinx"
34CONFIG_LOCALVERSION_AUTO=y
35CONFIG_HAVE_KERNEL_GZIP=y
36CONFIG_HAVE_KERNEL_LZMA=y
37CONFIG_HAVE_KERNEL_XZ=y
38CONFIG_HAVE_KERNEL_LZO=y
39CONFIG_KERNEL_GZIP=y
40# CONFIG_KERNEL_LZMA is not set
41# CONFIG_KERNEL_XZ is not set
42# CONFIG_KERNEL_LZO is not set
43CONFIG_DEFAULT_HOSTNAME="(none)"
44CONFIG_SWAP=y
45CONFIG_SYSVIPC=y
46CONFIG_SYSVIPC_SYSCTL=y
47# CONFIG_POSIX_MQUEUE is not set
48# CONFIG_FHANDLE is not set
49# CONFIG_AUDIT is not set
50CONFIG_HAVE_GENERIC_HARDIRQS=y
51
52#
53# IRQ subsystem
54#
55CONFIG_GENERIC_HARDIRQS=y
56CONFIG_GENERIC_IRQ_PROBE=y
57CONFIG_GENERIC_IRQ_SHOW=y
58CONFIG_HARDIRQS_SW_RESEND=y
59CONFIG_GENERIC_IRQ_CHIP=y
60CONFIG_IRQ_DOMAIN=y
61CONFIG_IRQ_DOMAIN_DEBUG=y
62CONFIG_IRQ_FORCED_THREADING=y
63CONFIG_SPARSE_IRQ=y
64CONFIG_KTIME_SCALAR=y
65CONFIG_GENERIC_CLOCKEVENTS=y
66CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
67CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
68
69#
70# Timers subsystem
71#
72CONFIG_TICK_ONESHOT=y
73CONFIG_NO_HZ=y
74CONFIG_HIGH_RES_TIMERS=y
75
76#
77# CPU/Task time and stats accounting
78#
79CONFIG_TICK_CPU_ACCOUNTING=y
80# CONFIG_BSD_PROCESS_ACCT is not set
81# CONFIG_TASKSTATS is not set
82
83#
84# RCU Subsystem
85#
86CONFIG_TREE_PREEMPT_RCU=y
87CONFIG_PREEMPT_RCU=y
88CONFIG_RCU_FANOUT=32
89CONFIG_RCU_FANOUT_LEAF=16
90# CONFIG_RCU_FANOUT_EXACT is not set
91# CONFIG_TREE_RCU_TRACE is not set
92# CONFIG_RCU_BOOST is not set
93# CONFIG_RCU_NOCB_CPU is not set
94CONFIG_IKCONFIG=y
95CONFIG_IKCONFIG_PROC=y
96CONFIG_LOG_BUF_SHIFT=14
97# CONFIG_CGROUPS is not set
98# CONFIG_CHECKPOINT_RESTORE is not set
99# CONFIG_NAMESPACES is not set
100# CONFIG_SCHED_AUTOGROUP is not set
101CONFIG_SYSFS_DEPRECATED=y
102CONFIG_SYSFS_DEPRECATED_V2=y
103# CONFIG_RELAY is not set
104CONFIG_BLK_DEV_INITRD=y
105CONFIG_INITRAMFS_SOURCE=""
106CONFIG_RD_GZIP=y
107# CONFIG_RD_BZIP2 is not set
108# CONFIG_RD_LZMA is not set
109# CONFIG_RD_XZ is not set
110# CONFIG_RD_LZO is not set
111CONFIG_CC_OPTIMIZE_FOR_SIZE=y
112CONFIG_SYSCTL=y
113CONFIG_ANON_INODES=y
114CONFIG_EXPERT=y
115CONFIG_HAVE_UID16=y
116CONFIG_UID16=y
117CONFIG_SYSCTL_SYSCALL=y
118CONFIG_KALLSYMS=y
119# CONFIG_KALLSYMS_ALL is not set
120CONFIG_HOTPLUG=y
121CONFIG_PRINTK=y
122CONFIG_BUG=y
123CONFIG_ELF_CORE=y
124CONFIG_BASE_FULL=y
125CONFIG_FUTEX=y
126CONFIG_EPOLL=y
127CONFIG_SIGNALFD=y
128CONFIG_TIMERFD=y
129CONFIG_EVENTFD=y
130CONFIG_SHMEM=y
131CONFIG_AIO=y
132CONFIG_EMBEDDED=y
133CONFIG_HAVE_PERF_EVENTS=y
134CONFIG_PERF_USE_VMALLOC=y
135
136#
137# Kernel Performance Events And Counters
138#
139CONFIG_PERF_EVENTS=y
140# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
141CONFIG_VM_EVENT_COUNTERS=y
142CONFIG_PCI_QUIRKS=y
143CONFIG_COMPAT_BRK=y
144CONFIG_SLAB=y
145# CONFIG_SLUB is not set
146# CONFIG_PROFILING is not set
147CONFIG_HAVE_OPROFILE=y
148# CONFIG_KPROBES is not set
149# CONFIG_JUMP_LABEL is not set
150CONFIG_HAVE_KPROBES=y
151CONFIG_HAVE_KRETPROBES=y
152CONFIG_HAVE_ARCH_TRACEHOOK=y
153CONFIG_HAVE_DMA_ATTRS=y
154CONFIG_HAVE_DMA_CONTIGUOUS=y
155CONFIG_USE_GENERIC_SMP_HELPERS=y
156CONFIG_GENERIC_SMP_IDLE_THREAD=y
157CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
158CONFIG_HAVE_CLK=y
159CONFIG_HAVE_DMA_API_DEBUG=y
160CONFIG_HAVE_HW_BREAKPOINT=y
161CONFIG_HAVE_ARCH_JUMP_LABEL=y
162CONFIG_ARCH_WANT_IPC_PARSE_VERSION=y
163CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
164CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
165CONFIG_MODULES_USE_ELF_REL=y
166CONFIG_CLONE_BACKWARDS=y
167
168#
169# GCOV-based kernel profiling
170#
171# CONFIG_GCOV_KERNEL is not set
172CONFIG_HAVE_GENERIC_DMA_COHERENT=y
173CONFIG_SLABINFO=y
174CONFIG_RT_MUTEXES=y
175CONFIG_BASE_SMALL=0
176CONFIG_MODULES=y
177# CONFIG_MODULE_FORCE_LOAD is not set
178CONFIG_MODULE_UNLOAD=y
179CONFIG_MODULE_FORCE_UNLOAD=y
180CONFIG_MODVERSIONS=y
181# CONFIG_MODULE_SRCVERSION_ALL is not set
182# CONFIG_MODULE_SIG is not set
183CONFIG_STOP_MACHINE=y
184CONFIG_BLOCK=y
185CONFIG_LBDAF=y
186# CONFIG_BLK_DEV_BSG is not set
187# CONFIG_BLK_DEV_BSGLIB is not set
188# CONFIG_BLK_DEV_INTEGRITY is not set
189
190#
191# Partition Types
192#
193# CONFIG_PARTITION_ADVANCED is not set
194CONFIG_MSDOS_PARTITION=y
195CONFIG_EFI_PARTITION=y
196
197#
198# IO Schedulers
199#
200CONFIG_IOSCHED_NOOP=y
201CONFIG_IOSCHED_DEADLINE=y
202CONFIG_IOSCHED_CFQ=y
203# CONFIG_DEFAULT_DEADLINE is not set
204CONFIG_DEFAULT_CFQ=y
205# CONFIG_DEFAULT_NOOP is not set
206CONFIG_DEFAULT_IOSCHED="cfq"
207# CONFIG_FREEZER is not set
208
209#
210# System Type
211#
212CONFIG_MMU=y
213CONFIG_ARCH_MULTIPLATFORM=y
214# CONFIG_ARCH_INTEGRATOR is not set
215# CONFIG_ARCH_REALVIEW is not set
216# CONFIG_ARCH_VERSATILE is not set
217# CONFIG_ARCH_AT91 is not set
218# CONFIG_ARCH_BCM2835 is not set
219# CONFIG_ARCH_CNS3XXX is not set
220# CONFIG_ARCH_CLPS711X is not set
221# CONFIG_ARCH_GEMINI is not set
222# CONFIG_ARCH_SIRF is not set
223# CONFIG_ARCH_EBSA110 is not set
224# CONFIG_ARCH_EP93XX is not set
225# CONFIG_ARCH_FOOTBRIDGE is not set
226# CONFIG_ARCH_MXS is not set
227# CONFIG_ARCH_NETX is not set
228# CONFIG_ARCH_H720X is not set
229# CONFIG_ARCH_IOP13XX is not set
230# CONFIG_ARCH_IOP32X is not set
231# CONFIG_ARCH_IOP33X is not set
232# CONFIG_ARCH_IXP4XX is not set
233# CONFIG_ARCH_DOVE is not set
234# CONFIG_ARCH_KIRKWOOD is not set
235# CONFIG_ARCH_MV78XX0 is not set
236# CONFIG_ARCH_ORION5X is not set
237# CONFIG_ARCH_MMP is not set
238# CONFIG_ARCH_KS8695 is not set
239# CONFIG_ARCH_W90X900 is not set
240# CONFIG_ARCH_LPC32XX is not set
241# CONFIG_ARCH_TEGRA is not set
242# CONFIG_ARCH_PXA is not set
243# CONFIG_ARCH_MSM is not set
244# CONFIG_ARCH_SHMOBILE is not set
245# CONFIG_ARCH_RPC is not set
246# CONFIG_ARCH_SA1100 is not set
247# CONFIG_ARCH_S3C24XX is not set
248# CONFIG_ARCH_S3C64XX is not set
249# CONFIG_ARCH_S5P64X0 is not set
250# CONFIG_ARCH_S5PC100 is not set
251# CONFIG_ARCH_S5PV210 is not set
252# CONFIG_ARCH_EXYNOS is not set
253# CONFIG_ARCH_SHARK is not set
254# CONFIG_ARCH_U300 is not set
255# CONFIG_ARCH_U8500 is not set
256# CONFIG_ARCH_NOMADIK is not set
257# CONFIG_PLAT_SPEAR is not set
258# CONFIG_ARCH_DAVINCI is not set
259# CONFIG_ARCH_OMAP is not set
260# CONFIG_ARCH_VT8500_SINGLE is not set
261
262#
263# Multiple platform selection
264#
265
266#
267# CPU Core family selection
268#
269# CONFIG_ARCH_MULTI_V6 is not set
270CONFIG_ARCH_MULTI_V7=y
271CONFIG_ARCH_MULTI_V6_V7=y
272# CONFIG_ARCH_MULTI_CPU_AUTO is not set
273# CONFIG_ARCH_MVEBU is not set
274# CONFIG_ARCH_BCM is not set
275# CONFIG_GPIO_PCA953X is not set
276# CONFIG_KEYBOARD_GPIO_POLLED is not set
277# CONFIG_ARCH_HIGHBANK is not set
278# CONFIG_ARCH_MXC is not set
279# CONFIG_ARCH_SOCFPGA is not set
280# CONFIG_ARCH_SUNXI is not set
281CONFIG_ARCH_VEXPRESS=y
282
283#
284# Versatile Express platform type
285#
286CONFIG_ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA=y
287# CONFIG_ARCH_VEXPRESS_CA9X4 is not set
288CONFIG_PLAT_VERSATILE_CLCD=y
289CONFIG_PLAT_VERSATILE_SCHED_CLOCK=y
290# CONFIG_ARCH_VT8500 is not set
291CONFIG_ARCH_ZYNQ=y
292
293#
294# Xilinx Specific Options
295#
296CONFIG_XILINX_L1_PREFETCH=y
297CONFIG_XILINX_L2_PREFETCH=y
298CONFIG_DMA_ENGINE=y
299CONFIG_XILINX_AXIPCIE=y
300CONFIG_PLAT_VERSATILE=y
301CONFIG_ARM_TIMER_SP804=y
302
303#
304# Processor Type
305#
306CONFIG_CPU_V7=y
307CONFIG_CPU_32v6K=y
308CONFIG_CPU_32v7=y
309CONFIG_CPU_ABRT_EV7=y
310CONFIG_CPU_PABRT_V7=y
311CONFIG_CPU_CACHE_V7=y
312CONFIG_CPU_CACHE_VIPT=y
313CONFIG_CPU_COPY_V6=y
314CONFIG_CPU_TLB_V7=y
315CONFIG_CPU_HAS_ASID=y
316CONFIG_CPU_CP15=y
317CONFIG_CPU_CP15_MMU=y
318
319#
320# Processor Features
321#
322# CONFIG_ARM_LPAE is not set
323# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set
324CONFIG_ARM_THUMB=y
325# CONFIG_ARM_THUMBEE is not set
326# CONFIG_ARM_VIRT_EXT is not set
327CONFIG_SWP_EMULATE=y
328# CONFIG_CPU_ICACHE_DISABLE is not set
329# CONFIG_CPU_DCACHE_DISABLE is not set
330# CONFIG_CPU_BPREDICT_DISABLE is not set
331CONFIG_OUTER_CACHE=y
332CONFIG_OUTER_CACHE_SYNC=y
333CONFIG_MIGHT_HAVE_CACHE_L2X0=y
334CONFIG_CACHE_L2X0=y
335CONFIG_CACHE_PL310=y
336CONFIG_ARM_L1_CACHE_SHIFT_6=y
337CONFIG_ARM_L1_CACHE_SHIFT=6
338CONFIG_ARM_DMA_MEM_BUFFERABLE=y
339CONFIG_ARM_NR_BANKS=8
340CONFIG_MULTI_IRQ_HANDLER=y
341# CONFIG_ARM_ERRATA_430973 is not set
342CONFIG_PL310_ERRATA_588369=y
343CONFIG_ARM_ERRATA_720789=y
344CONFIG_PL310_ERRATA_727915=y
345CONFIG_PL310_ERRATA_753970=y
346# CONFIG_ARM_ERRATA_754322 is not set
347# CONFIG_ARM_ERRATA_754327 is not set
348# CONFIG_ARM_ERRATA_764369 is not set
349CONFIG_PL310_ERRATA_769419=y
350# CONFIG_ARM_ERRATA_775420 is not set
351CONFIG_ARM_GIC=y
352CONFIG_ICST=y
353
354#
355# Bus support
356#
357CONFIG_ARM_AMBA=y
358CONFIG_PCI=y
359CONFIG_PCI_SYSCALL=y
360CONFIG_ARCH_SUPPORTS_MSI=y
361CONFIG_PCI_MSI=y
362# CONFIG_PCI_DEBUG is not set
363# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set
364# CONFIG_PCI_STUB is not set
365# CONFIG_PCI_IOV is not set
366# CONFIG_PCI_PRI is not set
367# CONFIG_PCI_PASID is not set
368# CONFIG_PCCARD is not set
369
370#
371# Kernel Features
372#
373CONFIG_HAVE_SMP=y
374CONFIG_SMP=y
375CONFIG_SMP_ON_UP=y
376CONFIG_ARM_CPU_TOPOLOGY=y
377CONFIG_SCHED_MC=y
378CONFIG_SCHED_SMT=y
379CONFIG_HAVE_ARM_SCU=y
380# CONFIG_ARM_ARCH_TIMER is not set
381CONFIG_HAVE_ARM_TWD=y
382CONFIG_VMSPLIT_3G=y
383# CONFIG_VMSPLIT_2G is not set
384# CONFIG_VMSPLIT_1G is not set
385CONFIG_PAGE_OFFSET=0xC0000000
386CONFIG_NR_CPUS=4
387CONFIG_HOTPLUG_CPU=y
388CONFIG_LOCAL_TIMERS=y
389CONFIG_ARCH_NR_GPIO=1024
390CONFIG_PREEMPT=y
391CONFIG_PREEMPT_RT_BASE=y
392CONFIG_HAVE_PREEMPT_LAZY=y
393CONFIG_PREEMPT_LAZY=y
394# CONFIG_PREEMPT_NONE is not set
395# CONFIG_PREEMPT_VOLUNTARY is not set
396# CONFIG_PREEMPT__LL is not set
397# CONFIG_PREEMPT_RTB is not set
398CONFIG_PREEMPT_RT_FULL=y
399CONFIG_PREEMPT_COUNT=y
400CONFIG_HZ=100
401# CONFIG_THUMB2_KERNEL is not set
402CONFIG_AEABI=y
403# CONFIG_OABI_COMPAT is not set
404# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
405# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
406CONFIG_HAVE_ARCH_PFN_VALID=y
407CONFIG_HIGHMEM=y
408# CONFIG_HIGHPTE is not set
409CONFIG_HW_PERF_EVENTS=y
410CONFIG_SELECT_MEMORY_MODEL=y
411CONFIG_FLATMEM_MANUAL=y
412CONFIG_FLATMEM=y
413CONFIG_FLAT_NODE_MEM_MAP=y
414CONFIG_HAVE_MEMBLOCK=y
415CONFIG_MEMORY_ISOLATION=y
416CONFIG_PAGEFLAGS_EXTENDED=y
417CONFIG_SPLIT_PTLOCK_CPUS=4
418# CONFIG_COMPACTION is not set
419CONFIG_MIGRATION=y
420# CONFIG_PHYS_ADDR_T_64BIT is not set
421CONFIG_ZONE_DMA_FLAG=0
422CONFIG_BOUNCE=y
423CONFIG_VIRT_TO_BUS=y
424# CONFIG_KSM is not set
425CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
426CONFIG_CROSS_MEMORY_ATTACH=y
427# CONFIG_CLEANCACHE is not set
428# CONFIG_FRONTSWAP is not set
429CONFIG_FORCE_MAX_ZONEORDER=11
430CONFIG_ALIGNMENT_TRAP=y
431# CONFIG_UACCESS_WITH_MEMCPY is not set
432# CONFIG_SECCOMP is not set
433# CONFIG_CC_STACKPROTECTOR is not set
434# CONFIG_XEN is not set
435
436#
437# Boot options
438#
439CONFIG_USE_OF=y
440CONFIG_ATAGS=y
441# CONFIG_DEPRECATED_PARAM_STRUCT is not set
442CONFIG_ZBOOT_ROM_TEXT=0x0
443CONFIG_ZBOOT_ROM_BSS=0x0
444# CONFIG_ARM_APPENDED_DTB is not set
445CONFIG_CMDLINE="console=ttyPS0,115200n8 root=/dev/ram rw initrd=0x00800000,16M earlyprintk mtdparts=physmap-flash.0:512K(nor-fsbl),512K(nor-u-boot),5M(nor-linux),9M(nor-user),1M(nor-scratch),-(nor-rootfs)"
446CONFIG_CMDLINE_FROM_BOOTLOADER=y
447# CONFIG_CMDLINE_EXTEND is not set
448# CONFIG_CMDLINE_FORCE is not set
449# CONFIG_KEXEC is not set
450# CONFIG_CRASH_DUMP is not set
451CONFIG_AUTO_ZRELADDR=y
452
453#
454# CPU Power Management
455#
456
457#
458# CPU Frequency scaling
459#
460CONFIG_CPU_FREQ=y
461CONFIG_CPU_FREQ_TABLE=y
462CONFIG_CPU_FREQ_GOV_COMMON=y
463CONFIG_CPU_FREQ_STAT=y
464CONFIG_CPU_FREQ_STAT_DETAILS=y
465# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
466# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
467CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
468# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
469# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
470CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
471CONFIG_CPU_FREQ_GOV_POWERSAVE=y
472CONFIG_CPU_FREQ_GOV_USERSPACE=y
473CONFIG_CPU_FREQ_GOV_ONDEMAND=y
474CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
475
476#
477# ARM CPU frequency scaling drivers
478#
479# CONFIG_ARM_EXYNOS4210_CPUFREQ is not set
480# CONFIG_ARM_EXYNOS4X12_CPUFREQ is not set
481# CONFIG_ARM_EXYNOS5250_CPUFREQ is not set
482CONFIG_ARM_ZYNQ_CPUFREQ=y
483# CONFIG_CPU_IDLE is not set
484# CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED is not set
485
486#
487# Floating point emulation
488#
489
490#
491# At least one emulation must be selected
492#
493CONFIG_VFP=y
494CONFIG_VFPv3=y
495CONFIG_NEON=y
496
497#
498# Userspace binary formats
499#
500CONFIG_BINFMT_ELF=y
501CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE=y
502# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
503CONFIG_HAVE_AOUT=y
504# CONFIG_BINFMT_AOUT is not set
505# CONFIG_BINFMT_MISC is not set
506CONFIG_COREDUMP=y
507
508#
509# Power management options
510#
511# CONFIG_SUSPEND is not set
512CONFIG_PM_RUNTIME=y
513CONFIG_PM=y
514# CONFIG_PM_DEBUG is not set
515# CONFIG_APM_EMULATION is not set
516CONFIG_ARCH_HAS_OPP=y
517CONFIG_PM_OPP=y
518CONFIG_PM_CLK=y
519CONFIG_ARCH_SUSPEND_POSSIBLE=y
520# CONFIG_ARM_CPU_SUSPEND is not set
521CONFIG_NET=y
522
523#
524# Networking options
525#
526CONFIG_PACKET=y
527# CONFIG_PACKET_DIAG is not set
528CONFIG_UNIX=y
529# CONFIG_UNIX_DIAG is not set
530CONFIG_XFRM=y
531# CONFIG_XFRM_USER is not set
532# CONFIG_XFRM_SUB_POLICY is not set
533# CONFIG_XFRM_MIGRATE is not set
534# CONFIG_XFRM_STATISTICS is not set
535# CONFIG_NET_KEY is not set
536CONFIG_INET=y
537CONFIG_IP_MULTICAST=y
538# CONFIG_IP_ADVANCED_ROUTER is not set
539CONFIG_IP_PNP=y
540CONFIG_IP_PNP_DHCP=y
541CONFIG_IP_PNP_BOOTP=y
542CONFIG_IP_PNP_RARP=y
543CONFIG_NET_IPIP=m
544# CONFIG_NET_IPGRE_DEMUX is not set
545# CONFIG_IP_MROUTE is not set
546# CONFIG_ARPD is not set
547# CONFIG_SYN_COOKIES is not set
548# CONFIG_NET_IPVTI is not set
549# CONFIG_INET_AH is not set
550# CONFIG_INET_ESP is not set
551# CONFIG_INET_IPCOMP is not set
552# CONFIG_INET_XFRM_TUNNEL is not set
553CONFIG_INET_TUNNEL=m
554CONFIG_INET_XFRM_MODE_TRANSPORT=y
555CONFIG_INET_XFRM_MODE_TUNNEL=y
556CONFIG_INET_XFRM_MODE_BEET=y
557CONFIG_INET_LRO=y
558CONFIG_INET_DIAG=y
559CONFIG_INET_TCP_DIAG=y
560# CONFIG_INET_UDP_DIAG is not set
561# CONFIG_TCP_CONG_ADVANCED is not set
562CONFIG_TCP_CONG_CUBIC=y
563CONFIG_DEFAULT_TCP_CONG="cubic"
564# CONFIG_TCP_MD5SIG is not set
565CONFIG_IPV6=m
566# CONFIG_IPV6_PRIVACY is not set
567# CONFIG_IPV6_ROUTER_PREF is not set
568# CONFIG_IPV6_OPTIMISTIC_DAD is not set
569# CONFIG_INET6_AH is not set
570# CONFIG_INET6_ESP is not set
571# CONFIG_INET6_IPCOMP is not set
572# CONFIG_IPV6_MIP6 is not set
573# CONFIG_INET6_XFRM_TUNNEL is not set
574# CONFIG_INET6_TUNNEL is not set
575CONFIG_INET6_XFRM_MODE_TRANSPORT=m
576CONFIG_INET6_XFRM_MODE_TUNNEL=m
577CONFIG_INET6_XFRM_MODE_BEET=m
578# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
579CONFIG_IPV6_SIT=m
580# CONFIG_IPV6_SIT_6RD is not set
581CONFIG_IPV6_NDISC_NODETYPE=y
582# CONFIG_IPV6_TUNNEL is not set
583# CONFIG_IPV6_GRE is not set
584# CONFIG_IPV6_MULTIPLE_TABLES is not set
585# CONFIG_IPV6_MROUTE is not set
586# CONFIG_NETWORK_SECMARK is not set
587# CONFIG_NETWORK_PHY_TIMESTAMPING is not set
588# CONFIG_NETFILTER is not set
589# CONFIG_IP_DCCP is not set
590# CONFIG_IP_SCTP is not set
591# CONFIG_RDS is not set
592# CONFIG_TIPC is not set
593# CONFIG_ATM is not set
594# CONFIG_L2TP is not set
595# CONFIG_BRIDGE is not set
596CONFIG_HAVE_NET_DSA=y
597CONFIG_VLAN_8021Q=m
598# CONFIG_VLAN_8021Q_GVRP is not set
599# CONFIG_DECNET is not set
600# CONFIG_LLC2 is not set
601# CONFIG_IPX is not set
602# CONFIG_ATALK is not set
603# CONFIG_X25 is not set
604# CONFIG_LAPB is not set
605# CONFIG_WAN_ROUTER is not set
606# CONFIG_PHONET is not set
607# CONFIG_IEEE802154 is not set
608# CONFIG_NET_SCHED is not set
609# CONFIG_DCB is not set
610# CONFIG_BATMAN_ADV is not set
611# CONFIG_OPENVSWITCH is not set
612CONFIG_RPS=y
613CONFIG_RFS_ACCEL=y
614CONFIG_XPS=y
615CONFIG_BQL=y
616# CONFIG_BPF_JIT is not set
617
618#
619# Network testing
620#
621# CONFIG_NET_PKTGEN is not set
622# CONFIG_HAMRADIO is not set
623# CONFIG_CAN is not set
624# CONFIG_IRDA is not set
625# CONFIG_BT is not set
626# CONFIG_AF_RXRPC is not set
627CONFIG_WIRELESS=y
628# CONFIG_CFG80211 is not set
629# CONFIG_LIB80211 is not set
630
631#
632# CFG80211 needs to be enabled for MAC80211
633#
634# CONFIG_WIMAX is not set
635# CONFIG_RFKILL is not set
636# CONFIG_NET_9P is not set
637# CONFIG_CAIF is not set
638# CONFIG_CEPH_LIB is not set
639# CONFIG_NFC is not set
640CONFIG_HAVE_BPF_JIT=y
641
642#
643# Device Drivers
644#
645
646#
647# Generic Driver Options
648#
649CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
650CONFIG_DEVTMPFS=y
651CONFIG_DEVTMPFS_MOUNT=y
652CONFIG_STANDALONE=y
653CONFIG_PREVENT_FIRMWARE_BUILD=y
654CONFIG_FW_LOADER=y
655CONFIG_FIRMWARE_IN_KERNEL=y
656CONFIG_EXTRA_FIRMWARE=""
657# CONFIG_DEBUG_DRIVER is not set
658# CONFIG_DEBUG_DEVRES is not set
659# CONFIG_SYS_HYPERVISOR is not set
660# CONFIG_GENERIC_CPU_DEVICES is not set
661# CONFIG_DMA_SHARED_BUFFER is not set
662CONFIG_CMA=y
663# CONFIG_CMA_DEBUG is not set
664
665#
666# Default contiguous memory area size:
667#
668CONFIG_CMA_SIZE_MBYTES=16
669CONFIG_CMA_SIZE_SEL_MBYTES=y
670# CONFIG_CMA_SIZE_SEL_PERCENTAGE is not set
671# CONFIG_CMA_SIZE_SEL_MIN is not set
672# CONFIG_CMA_SIZE_SEL_MAX is not set
673CONFIG_CMA_ALIGNMENT=8
674CONFIG_CMA_AREAS=7
675
676#
677# Bus devices
678#
679CONFIG_CONNECTOR=y
680CONFIG_PROC_EVENTS=y
681CONFIG_MTD=y
682# CONFIG_MTD_TESTS is not set
683# CONFIG_MTD_REDBOOT_PARTS is not set
684CONFIG_MTD_CMDLINE_PARTS=y
685# CONFIG_MTD_AFS_PARTS is not set
686CONFIG_MTD_OF_PARTS=y
687# CONFIG_MTD_AR7_PARTS is not set
688
689#
690# User Modules And Translation Layers
691#
692CONFIG_MTD_CHAR=y
693CONFIG_MTD_BLKDEVS=y
694CONFIG_MTD_BLOCK=y
695# CONFIG_FTL is not set
696# CONFIG_NFTL is not set
697# CONFIG_INFTL is not set
698# CONFIG_RFD_FTL is not set
699# CONFIG_SSFDC is not set
700# CONFIG_SM_FTL is not set
701# CONFIG_MTD_OOPS is not set
702# CONFIG_MTD_SWAP is not set
703
704#
705# RAM/ROM/Flash chip drivers
706#
707CONFIG_MTD_CFI=y
708# CONFIG_MTD_JEDECPROBE is not set
709CONFIG_MTD_GEN_PROBE=y
710# CONFIG_MTD_CFI_ADV_OPTIONS is not set
711CONFIG_MTD_MAP_BANK_WIDTH_1=y
712CONFIG_MTD_MAP_BANK_WIDTH_2=y
713CONFIG_MTD_MAP_BANK_WIDTH_4=y
714# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
715# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
716# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
717CONFIG_MTD_CFI_I1=y
718CONFIG_MTD_CFI_I2=y
719# CONFIG_MTD_CFI_I4 is not set
720# CONFIG_MTD_CFI_I8 is not set
721# CONFIG_MTD_CFI_INTELEXT is not set
722CONFIG_MTD_CFI_AMDSTD=y
723# CONFIG_MTD_CFI_STAA is not set
724CONFIG_MTD_CFI_UTIL=y
725# CONFIG_MTD_RAM is not set
726# CONFIG_MTD_ROM is not set
727# CONFIG_MTD_ABSENT is not set
728
729#
730# Mapping drivers for chip access
731#
732# CONFIG_MTD_COMPLEX_MAPPINGS is not set
733CONFIG_MTD_PHYSMAP=y
734# CONFIG_MTD_PHYSMAP_COMPAT is not set
735CONFIG_MTD_PHYSMAP_OF=y
736# CONFIG_MTD_INTEL_VR_NOR is not set
737# CONFIG_MTD_PLATRAM is not set
738
739#
740# Self-contained MTD device drivers
741#
742# CONFIG_MTD_PMC551 is not set
743# CONFIG_MTD_DATAFLASH is not set
744CONFIG_MTD_M25P80=y
745# CONFIG_M25PXX_USE_FAST_READ is not set
746# CONFIG_MTD_SST25L is not set
747# CONFIG_MTD_SLRAM is not set
748# CONFIG_MTD_PHRAM is not set
749# CONFIG_MTD_MTDRAM is not set
750# CONFIG_MTD_BLOCK2MTD is not set
751
752#
753# Disk-On-Chip Device Drivers
754#
755# CONFIG_MTD_DOC2000 is not set
756# CONFIG_MTD_DOC2001 is not set
757# CONFIG_MTD_DOC2001PLUS is not set
758# CONFIG_MTD_DOCG3 is not set
759CONFIG_MTD_NAND_ECC=y
760# CONFIG_MTD_NAND_ECC_SMC is not set
761CONFIG_MTD_NAND=y
762# CONFIG_MTD_NAND_ECC_BCH is not set
763# CONFIG_MTD_SM_COMMON is not set
764# CONFIG_MTD_NAND_MUSEUM_IDS is not set
765# CONFIG_MTD_NAND_DENALI is not set
766# CONFIG_MTD_NAND_GPIO is not set
767CONFIG_MTD_NAND_IDS=y
768# CONFIG_MTD_NAND_RICOH is not set
769# CONFIG_MTD_NAND_DISKONCHIP is not set
770# CONFIG_MTD_NAND_DOCG4 is not set
771# CONFIG_MTD_NAND_CAFE is not set
772# CONFIG_MTD_NAND_NANDSIM is not set
773# CONFIG_MTD_NAND_PLATFORM is not set
774# CONFIG_MTD_ALAUDA is not set
775CONFIG_MTD_NAND_XILINX_PS=y
776# CONFIG_MTD_ONENAND is not set
777
778#
779# LPDDR flash memory drivers
780#
781# CONFIG_MTD_LPDDR is not set
782# CONFIG_MTD_UBI is not set
783CONFIG_DTC=y
784CONFIG_OF=y
785
786#
787# Device Tree and Open Firmware support
788#
789CONFIG_PROC_DEVICETREE=y
790# CONFIG_OF_SELFTEST is not set
791CONFIG_OF_FLATTREE=y
792CONFIG_OF_EARLY_FLATTREE=y
793CONFIG_OF_ADDRESS=y
794CONFIG_OF_IRQ=y
795CONFIG_OF_DEVICE=y
796CONFIG_OF_I2C=y
797CONFIG_OF_NET=y
798CONFIG_OF_MDIO=y
799CONFIG_OF_PCI=y
800CONFIG_OF_PCI_IRQ=y
801CONFIG_OF_MTD=y
802# CONFIG_PARPORT is not set
803CONFIG_BLK_DEV=y
804# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set
805# CONFIG_BLK_CPQ_DA is not set
806# CONFIG_BLK_CPQ_CISS_DA is not set
807# CONFIG_BLK_DEV_DAC960 is not set
808# CONFIG_BLK_DEV_UMEM is not set
809# CONFIG_BLK_DEV_COW_COMMON is not set
810CONFIG_BLK_DEV_LOOP=y
811CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
812# CONFIG_BLK_DEV_CRYPTOLOOP is not set
813# CONFIG_BLK_DEV_DRBD is not set
814# CONFIG_BLK_DEV_NBD is not set
815# CONFIG_BLK_DEV_NVME is not set
816# CONFIG_BLK_DEV_SX8 is not set
817CONFIG_BLK_DEV_RAM=y
818CONFIG_BLK_DEV_RAM_COUNT=16
819CONFIG_BLK_DEV_RAM_SIZE=16384
820# CONFIG_BLK_DEV_XIP is not set
821# CONFIG_CDROM_PKTCDVD is not set
822# CONFIG_ATA_OVER_ETH is not set
823# CONFIG_MG_DISK is not set
824# CONFIG_VIRTIO_BLK is not set
825# CONFIG_BLK_DEV_RBD is not set
826
827#
828# Misc devices
829#
830# CONFIG_SENSORS_LIS3LV02D is not set
831# CONFIG_AD525X_DPOT is not set
832# CONFIG_ATMEL_PWM is not set
833# CONFIG_PHANTOM is not set
834# CONFIG_INTEL_MID_PTI is not set
835# CONFIG_SGI_IOC4 is not set
836# CONFIG_TIFM_CORE is not set
837# CONFIG_ICS932S401 is not set
838# CONFIG_ENCLOSURE_SERVICES is not set
839# CONFIG_HP_ILO is not set
840# CONFIG_APDS9802ALS is not set
841# CONFIG_ISL29003 is not set
842# CONFIG_ISL29020 is not set
843# CONFIG_SENSORS_TSL2550 is not set
844# CONFIG_SENSORS_BH1780 is not set
845# CONFIG_SENSORS_BH1770 is not set
846# CONFIG_SENSORS_APDS990X is not set
847# CONFIG_HMC6352 is not set
848# CONFIG_DS1682 is not set
849# CONFIG_TI_DAC7512 is not set
850# CONFIG_ARM_CHARLCD is not set
851# CONFIG_BMP085_I2C is not set
852# CONFIG_BMP085_SPI is not set
853# CONFIG_PCH_PHUB is not set
854# CONFIG_USB_SWITCH_FSA9480 is not set
855CONFIG_SI570=y
856# CONFIG_C2PORT is not set
857
858#
859# EEPROM support
860#
861CONFIG_EEPROM_AT24=y
862CONFIG_EEPROM_AT25=y
863# CONFIG_EEPROM_LEGACY is not set
864# CONFIG_EEPROM_MAX6875 is not set
865# CONFIG_EEPROM_93CX6 is not set
866# CONFIG_EEPROM_93XX46 is not set
867# CONFIG_CB710_CORE is not set
868
869#
870# Texas Instruments shared transport line discipline
871#
872# CONFIG_TI_ST is not set
873# CONFIG_SENSORS_LIS3_SPI is not set
874# CONFIG_SENSORS_LIS3_I2C is not set
875
876#
877# Altera FPGA firmware download module
878#
879# CONFIG_ALTERA_STAPL is not set
880CONFIG_HAVE_IDE=y
881# CONFIG_IDE is not set
882
883#
884# SCSI device support
885#
886CONFIG_SCSI_MOD=y
887# CONFIG_RAID_ATTRS is not set
888CONFIG_SCSI=y
889CONFIG_SCSI_DMA=y
890# CONFIG_SCSI_TGT is not set
891# CONFIG_SCSI_NETLINK is not set
892CONFIG_SCSI_PROC_FS=y
893
894#
895# SCSI support type (disk, tape, CD-ROM)
896#
897CONFIG_BLK_DEV_SD=y
898# CONFIG_CHR_DEV_ST is not set
899# CONFIG_CHR_DEV_OSST is not set
900# CONFIG_BLK_DEV_SR is not set
901CONFIG_CHR_DEV_SG=y
902# CONFIG_CHR_DEV_SCH is not set
903CONFIG_SCSI_MULTI_LUN=y
904# CONFIG_SCSI_CONSTANTS is not set
905# CONFIG_SCSI_LOGGING is not set
906# CONFIG_SCSI_SCAN_ASYNC is not set
907
908#
909# SCSI Transports
910#
911# CONFIG_SCSI_SPI_ATTRS is not set
912# CONFIG_SCSI_FC_ATTRS is not set
913# CONFIG_SCSI_ISCSI_ATTRS is not set
914# CONFIG_SCSI_SAS_ATTRS is not set
915# CONFIG_SCSI_SAS_LIBSAS is not set
916# CONFIG_SCSI_SRP_ATTRS is not set
917CONFIG_SCSI_LOWLEVEL=y
918# CONFIG_ISCSI_TCP is not set
919# CONFIG_ISCSI_BOOT_SYSFS is not set
920# CONFIG_SCSI_CXGB3_ISCSI is not set
921# CONFIG_SCSI_CXGB4_ISCSI is not set
922# CONFIG_SCSI_BNX2_ISCSI is not set
923# CONFIG_SCSI_BNX2X_FCOE is not set
924# CONFIG_BE2ISCSI is not set
925# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
926# CONFIG_SCSI_HPSA is not set
927# CONFIG_SCSI_3W_9XXX is not set
928# CONFIG_SCSI_3W_SAS is not set
929# CONFIG_SCSI_ACARD is not set
930# CONFIG_SCSI_AACRAID is not set
931# CONFIG_SCSI_AIC7XXX is not set
932# CONFIG_SCSI_AIC7XXX_OLD is not set
933# CONFIG_SCSI_AIC79XX is not set
934# CONFIG_SCSI_AIC94XX is not set
935# CONFIG_SCSI_MVSAS is not set
936# CONFIG_SCSI_MVUMI is not set
937# CONFIG_SCSI_DPT_I2O is not set
938# CONFIG_SCSI_ADVANSYS is not set
939# CONFIG_SCSI_ARCMSR is not set
940# CONFIG_MEGARAID_NEWGEN is not set
941# CONFIG_MEGARAID_LEGACY is not set
942# CONFIG_MEGARAID_SAS is not set
943# CONFIG_SCSI_MPT2SAS is not set
944# CONFIG_SCSI_MPT3SAS is not set
945# CONFIG_SCSI_UFSHCD is not set
946# CONFIG_SCSI_HPTIOP is not set
947# CONFIG_LIBFC is not set
948# CONFIG_LIBFCOE is not set
949# CONFIG_FCOE is not set
950# CONFIG_SCSI_DMX3191D is not set
951# CONFIG_SCSI_FUTURE_DOMAIN is not set
952# CONFIG_SCSI_IPS is not set
953# CONFIG_SCSI_INITIO is not set
954# CONFIG_SCSI_INIA100 is not set
955# CONFIG_SCSI_STEX is not set
956# CONFIG_SCSI_SYM53C8XX_2 is not set
957# CONFIG_SCSI_QLOGIC_1280 is not set
958# CONFIG_SCSI_QLA_FC is not set
959# CONFIG_SCSI_QLA_ISCSI is not set
960# CONFIG_SCSI_LPFC is not set
961# CONFIG_SCSI_DC395x is not set
962# CONFIG_SCSI_DC390T is not set
963# CONFIG_SCSI_NSP32 is not set
964# CONFIG_SCSI_DEBUG is not set
965# CONFIG_SCSI_PMCRAID is not set
966# CONFIG_SCSI_PM8001 is not set
967# CONFIG_SCSI_SRP is not set
968# CONFIG_SCSI_BFA_FC is not set
969# CONFIG_SCSI_VIRTIO is not set
970# CONFIG_SCSI_CHELSIO_FCOE is not set
971# CONFIG_SCSI_DH is not set
972# CONFIG_SCSI_OSD_INITIATOR is not set
973CONFIG_HAVE_PATA_PLATFORM=y
974# CONFIG_ATA is not set
975# CONFIG_MD is not set
976# CONFIG_TARGET_CORE is not set
977# CONFIG_FUSION is not set
978
979#
980# IEEE 1394 (FireWire) support
981#
982# CONFIG_FIREWIRE is not set
983# CONFIG_FIREWIRE_NOSY is not set
984# CONFIG_I2O is not set
985CONFIG_NETDEVICES=y
986CONFIG_NET_CORE=y
987# CONFIG_BONDING is not set
988# CONFIG_DUMMY is not set
989# CONFIG_EQUALIZER is not set
990# CONFIG_NET_FC is not set
991CONFIG_MII=y
992# CONFIG_NET_TEAM is not set
993# CONFIG_MACVLAN is not set
994# CONFIG_VXLAN is not set
995# CONFIG_NETPOLL is not set
996# CONFIG_NET_POLL_CONTROLLER is not set
997# CONFIG_TUN is not set
998# CONFIG_VETH is not set
999# CONFIG_VIRTIO_NET is not set
1000# CONFIG_ARCNET is not set
1001
1002#
1003# CAIF transport drivers
1004#
1005
1006#
1007# Distributed Switch Architecture drivers
1008#
1009# CONFIG_NET_DSA_MV88E6XXX is not set
1010# CONFIG_NET_DSA_MV88E6060 is not set
1011# CONFIG_NET_DSA_MV88E6XXX_NEED_PPU is not set
1012# CONFIG_NET_DSA_MV88E6131 is not set
1013# CONFIG_NET_DSA_MV88E6123_61_65 is not set
1014CONFIG_ETHERNET=y
1015# CONFIG_NET_VENDOR_3COM is not set
1016# CONFIG_NET_VENDOR_ADAPTEC is not set
1017# CONFIG_NET_VENDOR_ALTEON is not set
1018# CONFIG_NET_VENDOR_AMD is not set
1019# CONFIG_NET_VENDOR_ATHEROS is not set
1020CONFIG_NET_CADENCE=y
1021# CONFIG_ARM_AT91_ETHER is not set
1022# CONFIG_MACB is not set
1023CONFIG_NET_VENDOR_BROADCOM=y
1024# CONFIG_B44 is not set
1025# CONFIG_BNX2 is not set
1026# CONFIG_CNIC is not set
1027CONFIG_TIGON3=y
1028# CONFIG_BNX2X is not set
1029# CONFIG_NET_VENDOR_BROCADE is not set
1030# CONFIG_NET_CALXEDA_XGMAC is not set
1031# CONFIG_NET_VENDOR_CHELSIO is not set
1032# CONFIG_NET_VENDOR_CIRRUS is not set
1033# CONFIG_NET_VENDOR_CISCO is not set
1034# CONFIG_DM9000 is not set
1035# CONFIG_DNET is not set
1036# CONFIG_NET_VENDOR_DEC is not set
1037# CONFIG_NET_VENDOR_DLINK is not set
1038# CONFIG_NET_VENDOR_EMULEX is not set
1039# CONFIG_NET_VENDOR_EXAR is not set
1040# CONFIG_NET_VENDOR_FARADAY is not set
1041# CONFIG_NET_VENDOR_HP is not set
1042CONFIG_NET_VENDOR_INTEL=y
1043# CONFIG_E100 is not set
1044# CONFIG_E1000 is not set
1045CONFIG_E1000E=y
1046# CONFIG_IGB is not set
1047# CONFIG_IGBVF is not set
1048# CONFIG_IXGB is not set
1049# CONFIG_IXGBE is not set
1050# CONFIG_IXGBEVF is not set
1051CONFIG_NET_VENDOR_I825XX=y
1052# CONFIG_IP1000 is not set
1053# CONFIG_JME is not set
1054# CONFIG_NET_VENDOR_MARVELL is not set
1055# CONFIG_NET_VENDOR_MELLANOX is not set
1056# CONFIG_NET_VENDOR_MICREL is not set
1057# CONFIG_NET_VENDOR_MICROCHIP is not set
1058# CONFIG_NET_VENDOR_MYRI is not set
1059# CONFIG_FEALNX is not set
1060# CONFIG_NET_VENDOR_NATSEMI is not set
1061# CONFIG_NET_VENDOR_NVIDIA is not set
1062# CONFIG_NET_VENDOR_OKI is not set
1063# CONFIG_ETHOC is not set
1064# CONFIG_NET_PACKET_ENGINE is not set
1065# CONFIG_NET_VENDOR_QLOGIC is not set
1066CONFIG_NET_VENDOR_REALTEK=y
1067# CONFIG_8139CP is not set
1068# CONFIG_8139TOO is not set
1069CONFIG_R8169=y
1070# CONFIG_NET_VENDOR_RDC is not set
1071# CONFIG_NET_VENDOR_SEEQ is not set
1072# CONFIG_NET_VENDOR_SILAN is not set
1073# CONFIG_NET_VENDOR_SIS is not set
1074# CONFIG_SFC is not set
1075# CONFIG_NET_VENDOR_SMSC is not set
1076# CONFIG_NET_VENDOR_STMICRO is not set
1077# CONFIG_NET_VENDOR_SUN is not set
1078# CONFIG_NET_VENDOR_TEHUTI is not set
1079# CONFIG_NET_VENDOR_TI is not set
1080# CONFIG_NET_VENDOR_VIA is not set
1081# CONFIG_NET_VENDOR_WIZNET is not set
1082CONFIG_NET_VENDOR_XILINX=y
1083CONFIG_XILINX_EMACLITE=y
1084CONFIG_XILINX_AXI_EMAC=y
1085CONFIG_XILINX_PS_EMAC=y
1086# CONFIG_XILINX_PS_EMAC_HWTSTAMP is not set
1087# CONFIG_FDDI is not set
1088# CONFIG_HIPPI is not set
1089CONFIG_PHYLIB=y
1090
1091#
1092# MII PHY device drivers
1093#
1094# CONFIG_AT803X_PHY is not set
1095# CONFIG_AMD_PHY is not set
1096CONFIG_MARVELL_PHY=y
1097# CONFIG_DAVICOM_PHY is not set
1098# CONFIG_QSEMI_PHY is not set
1099# CONFIG_LXT_PHY is not set
1100# CONFIG_CICADA_PHY is not set
1101CONFIG_VITESSE_PHY=y
1102# CONFIG_SMSC_PHY is not set
1103# CONFIG_BROADCOM_PHY is not set
1104# CONFIG_BCM87XX_PHY is not set
1105# CONFIG_ICPLUS_PHY is not set
1106# CONFIG_REALTEK_PHY is not set
1107# CONFIG_NATIONAL_PHY is not set
1108# CONFIG_STE10XP is not set
1109# CONFIG_LSI_ET1011C_PHY is not set
1110# CONFIG_MICREL_PHY is not set
1111# CONFIG_FIXED_PHY is not set
1112CONFIG_MDIO_BITBANG=y
1113# CONFIG_MDIO_GPIO is not set
1114# CONFIG_MDIO_BUS_MUX_GPIO is not set
1115# CONFIG_MDIO_BUS_MUX_MMIOREG is not set
1116# CONFIG_MICREL_KS8995MA is not set
1117# CONFIG_PPP is not set
1118# CONFIG_SLIP is not set
1119
1120#
1121# USB Network Adapters
1122#
1123# CONFIG_USB_CATC is not set
1124# CONFIG_USB_KAWETH is not set
1125# CONFIG_USB_PEGASUS is not set
1126# CONFIG_USB_RTL8150 is not set
1127# CONFIG_USB_USBNET is not set
1128# CONFIG_USB_IPHETH is not set
1129CONFIG_WLAN=y
1130# CONFIG_ATMEL is not set
1131# CONFIG_PRISM54 is not set
1132# CONFIG_USB_ZD1201 is not set
1133# CONFIG_HOSTAP is not set
1134# CONFIG_WL_TI is not set
1135
1136#
1137# Enable WiMAX (Networking options) to see the WiMAX drivers
1138#
1139# CONFIG_WAN is not set
1140# CONFIG_VMXNET3 is not set
1141# CONFIG_ISDN is not set
1142
1143#
1144# Input device support
1145#
1146CONFIG_INPUT=y
1147# CONFIG_INPUT_FF_MEMLESS is not set
1148# CONFIG_INPUT_POLLDEV is not set
1149CONFIG_INPUT_SPARSEKMAP=y
1150# CONFIG_INPUT_MATRIXKMAP is not set
1151
1152#
1153# Userland interfaces
1154#
1155CONFIG_INPUT_MOUSEDEV=y
1156CONFIG_INPUT_MOUSEDEV_PSAUX=y
1157CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
1158CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
1159# CONFIG_INPUT_JOYDEV is not set
1160CONFIG_INPUT_EVDEV=y
1161CONFIG_INPUT_EVBUG=y
1162
1163#
1164# Input Device Drivers
1165#
1166CONFIG_INPUT_KEYBOARD=y
1167# CONFIG_KEYBOARD_ADP5588 is not set
1168# CONFIG_KEYBOARD_ADP5589 is not set
1169CONFIG_KEYBOARD_ATKBD=y
1170# CONFIG_KEYBOARD_QT1070 is not set
1171# CONFIG_KEYBOARD_QT2160 is not set
1172# CONFIG_KEYBOARD_LKKBD is not set
1173# CONFIG_KEYBOARD_GPIO is not set
1174# CONFIG_KEYBOARD_TCA6416 is not set
1175# CONFIG_KEYBOARD_TCA8418 is not set
1176# CONFIG_KEYBOARD_MATRIX is not set
1177# CONFIG_KEYBOARD_LM8333 is not set
1178# CONFIG_KEYBOARD_MAX7359 is not set
1179# CONFIG_KEYBOARD_MCS is not set
1180# CONFIG_KEYBOARD_MPR121 is not set
1181# CONFIG_KEYBOARD_NEWTON is not set
1182# CONFIG_KEYBOARD_OPENCORES is not set
1183# CONFIG_KEYBOARD_SAMSUNG is not set
1184# CONFIG_KEYBOARD_STOWAWAY is not set
1185# CONFIG_KEYBOARD_SUNKBD is not set
1186# CONFIG_KEYBOARD_XTKBD is not set
1187CONFIG_INPUT_MOUSE=y
1188CONFIG_MOUSE_PS2=y
1189CONFIG_MOUSE_PS2_ALPS=y
1190CONFIG_MOUSE_PS2_LOGIPS2PP=y
1191CONFIG_MOUSE_PS2_SYNAPTICS=y
1192CONFIG_MOUSE_PS2_TRACKPOINT=y
1193# CONFIG_MOUSE_PS2_ELANTECH is not set
1194# CONFIG_MOUSE_PS2_SENTELIC is not set
1195# CONFIG_MOUSE_PS2_TOUCHKIT is not set
1196# CONFIG_MOUSE_SERIAL is not set
1197# CONFIG_MOUSE_APPLETOUCH is not set
1198# CONFIG_MOUSE_BCM5974 is not set
1199# CONFIG_MOUSE_VSXXXAA is not set
1200# CONFIG_MOUSE_GPIO is not set
1201# CONFIG_MOUSE_SYNAPTICS_I2C is not set
1202# CONFIG_MOUSE_SYNAPTICS_USB is not set
1203# CONFIG_INPUT_JOYSTICK is not set
1204# CONFIG_INPUT_TABLET is not set
1205# CONFIG_INPUT_TOUCHSCREEN is not set
1206# CONFIG_INPUT_MISC is not set
1207
1208#
1209# Hardware I/O ports
1210#
1211CONFIG_SERIO=y
1212CONFIG_SERIO_SERPORT=y
1213# CONFIG_SERIO_AMBAKMI is not set
1214# CONFIG_SERIO_PCIPS2 is not set
1215CONFIG_SERIO_LIBPS2=y
1216# CONFIG_SERIO_RAW is not set
1217# CONFIG_SERIO_ALTERA_PS2 is not set
1218# CONFIG_SERIO_PS2MULT is not set
1219# CONFIG_SERIO_ARC_PS2 is not set
1220# CONFIG_GAMEPORT is not set
1221
1222#
1223# Character devices
1224#
1225CONFIG_VT=y
1226CONFIG_CONSOLE_TRANSLATIONS=y
1227CONFIG_VT_CONSOLE=y
1228CONFIG_HW_CONSOLE=y
1229CONFIG_VT_HW_CONSOLE_BINDING=y
1230CONFIG_UNIX98_PTYS=y
1231# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
1232# CONFIG_LEGACY_PTYS is not set
1233# CONFIG_SERIAL_NONSTANDARD is not set
1234# CONFIG_NOZOMI is not set
1235# CONFIG_N_GSM is not set
1236# CONFIG_TRACE_SINK is not set
1237# CONFIG_DEVKMEM is not set
1238
1239#
1240# Serial drivers
1241#
1242# CONFIG_SERIAL_8250 is not set
1243
1244#
1245# Non-8250 serial port support
1246#
1247# CONFIG_SERIAL_AMBA_PL010 is not set
1248# CONFIG_SERIAL_AMBA_PL011 is not set
1249# CONFIG_SERIAL_MAX3100 is not set
1250# CONFIG_SERIAL_MAX310X is not set
1251# CONFIG_SERIAL_MFD_HSU is not set
1252# CONFIG_SERIAL_UARTLITE is not set
1253CONFIG_SERIAL_CORE=y
1254CONFIG_SERIAL_CORE_CONSOLE=y
1255# CONFIG_SERIAL_JSM is not set
1256# CONFIG_SERIAL_SCCNXP is not set
1257# CONFIG_SERIAL_TIMBERDALE is not set
1258# CONFIG_SERIAL_ALTERA_JTAGUART is not set
1259# CONFIG_SERIAL_ALTERA_UART is not set
1260# CONFIG_SERIAL_IFX6X60 is not set
1261# CONFIG_SERIAL_PCH_UART is not set
1262CONFIG_SERIAL_XILINX_PS_UART=y
1263CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
1264# CONFIG_SERIAL_ARC is not set
1265# CONFIG_TTY_PRINTK is not set
1266# CONFIG_HVC_DCC is not set
1267# CONFIG_VIRTIO_CONSOLE is not set
1268# CONFIG_IPMI_HANDLER is not set
1269# CONFIG_HW_RANDOM is not set
1270CONFIG_XILINX_DEVCFG=y
1271# CONFIG_R3964 is not set
1272# CONFIG_APPLICOM is not set
1273# CONFIG_RAW_DRIVER is not set
1274# CONFIG_TCG_TPM is not set
1275CONFIG_DEVPORT=y
1276CONFIG_I2C=y
1277CONFIG_I2C_BOARDINFO=y
1278CONFIG_I2C_COMPAT=y
1279CONFIG_I2C_CHARDEV=y
1280CONFIG_I2C_MUX=y
1281
1282#
1283# Multiplexer I2C Chip support
1284#
1285# CONFIG_I2C_MUX_GPIO is not set
1286# CONFIG_I2C_MUX_PCA9541 is not set
1287CONFIG_I2C_MUX_PCA954x=y
1288CONFIG_I2C_HELPER_AUTO=y
1289
1290#
1291# I2C Hardware Bus support
1292#
1293
1294#
1295# PC SMBus host controller drivers
1296#
1297# CONFIG_I2C_ALI1535 is not set
1298# CONFIG_I2C_ALI1563 is not set
1299# CONFIG_I2C_ALI15X3 is not set
1300# CONFIG_I2C_AMD756 is not set
1301# CONFIG_I2C_AMD8111 is not set
1302# CONFIG_I2C_I801 is not set
1303# CONFIG_I2C_ISCH is not set
1304# CONFIG_I2C_PIIX4 is not set
1305# CONFIG_I2C_NFORCE2 is not set
1306# CONFIG_I2C_SIS5595 is not set
1307# CONFIG_I2C_SIS630 is not set
1308# CONFIG_I2C_SIS96X is not set
1309# CONFIG_I2C_VIA is not set
1310# CONFIG_I2C_VIAPRO is not set
1311
1312#
1313# I2C system bus drivers (mostly embedded / system-on-chip)
1314#
1315# CONFIG_I2C_CBUS_GPIO is not set
1316# CONFIG_I2C_DESIGNWARE_PLATFORM is not set
1317# CONFIG_I2C_DESIGNWARE_PCI is not set
1318# CONFIG_I2C_EG20T is not set
1319# CONFIG_I2C_GPIO is not set
1320# CONFIG_I2C_INTEL_MID is not set
1321# CONFIG_I2C_NOMADIK is not set
1322# CONFIG_I2C_OCORES is not set
1323# CONFIG_I2C_PCA_PLATFORM is not set
1324# CONFIG_I2C_PXA_PCI is not set
1325# CONFIG_I2C_SIMTEC is not set
1326# CONFIG_I2C_VERSATILE is not set
1327CONFIG_I2C_XILINX_PS=y
1328# CONFIG_I2C_XILINX is not set
1329
1330#
1331# External I2C/SMBus adapter drivers
1332#
1333# CONFIG_I2C_DIOLAN_U2C is not set
1334# CONFIG_I2C_PARPORT_LIGHT is not set
1335# CONFIG_I2C_TAOS_EVM is not set
1336# CONFIG_I2C_TINY_USB is not set
1337
1338#
1339# Other I2C/SMBus bus drivers
1340#
1341# CONFIG_I2C_STUB is not set
1342# CONFIG_I2C_DEBUG_CORE is not set
1343# CONFIG_I2C_DEBUG_ALGO is not set
1344# CONFIG_I2C_DEBUG_BUS is not set
1345CONFIG_SPI=y
1346# CONFIG_SPI_DEBUG is not set
1347CONFIG_SPI_MASTER=y
1348
1349#
1350# SPI Master Controller Drivers
1351#
1352# CONFIG_SPI_ALTERA is not set
1353# CONFIG_SPI_BITBANG is not set
1354# CONFIG_SPI_GPIO is not set
1355# CONFIG_SPI_OC_TINY is not set
1356# CONFIG_SPI_PL022 is not set
1357# CONFIG_SPI_PXA2XX_PCI is not set
1358# CONFIG_SPI_SC18IS602 is not set
1359# CONFIG_SPI_TOPCLIFF_PCH is not set
1360# CONFIG_SPI_XCOMM is not set
1361# CONFIG_SPI_XILINX is not set
1362CONFIG_SPI_XILINX_PS_QSPI=y
1363CONFIG_SPI_XILINX_PS_SPI=y
1364# CONFIG_SPI_DESIGNWARE is not set
1365
1366#
1367# SPI Protocol Masters
1368#
1369# CONFIG_SPI_SPIDEV is not set
1370# CONFIG_SPI_TLE62X0 is not set
1371# CONFIG_HSI is not set
1372
1373#
1374# PPS support
1375#
1376CONFIG_PPS=y
1377# CONFIG_PPS_DEBUG is not set
1378
1379#
1380# PPS clients support
1381#
1382# CONFIG_PPS_CLIENT_KTIMER is not set
1383# CONFIG_PPS_CLIENT_LDISC is not set
1384# CONFIG_PPS_CLIENT_GPIO is not set
1385
1386#
1387# PPS generators support
1388#
1389
1390#
1391# PTP clock support
1392#
1393CONFIG_PTP_1588_CLOCK=y
1394
1395#
1396# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks.
1397#
1398# CONFIG_PTP_1588_CLOCK_PCH is not set
1399CONFIG_ARCH_REQUIRE_GPIOLIB=y
1400CONFIG_GPIOLIB=y
1401CONFIG_OF_GPIO=y
1402# CONFIG_DEBUG_GPIO is not set
1403CONFIG_GPIO_SYSFS=y
1404
1405#
1406# Memory mapped GPIO drivers:
1407#
1408# CONFIG_GPIO_GENERIC_PLATFORM is not set
1409# CONFIG_GPIO_EM is not set
1410# CONFIG_GPIO_PL061 is not set
1411# CONFIG_GPIO_TS5500 is not set
1412CONFIG_GPIO_XILINX=y
1413CONFIG_GPIO_XILINX_PS=y
1414# CONFIG_GPIO_VX855 is not set
1415
1416#
1417# I2C GPIO expanders:
1418#
1419# CONFIG_GPIO_MAX7300 is not set
1420# CONFIG_GPIO_MAX732X is not set
1421# CONFIG_GPIO_PCF857X is not set
1422# CONFIG_GPIO_SX150X is not set
1423# CONFIG_GPIO_ADP5588 is not set
1424# CONFIG_GPIO_ADNP is not set
1425
1426#
1427# PCI GPIO expanders:
1428#
1429# CONFIG_GPIO_BT8XX is not set
1430# CONFIG_GPIO_AMD8111 is not set
1431# CONFIG_GPIO_ML_IOH is not set
1432# CONFIG_GPIO_RDC321X is not set
1433
1434#
1435# SPI GPIO expanders:
1436#
1437# CONFIG_GPIO_MAX7301 is not set
1438# CONFIG_GPIO_MCP23S08 is not set
1439# CONFIG_GPIO_MC33880 is not set
1440# CONFIG_GPIO_74X164 is not set
1441
1442#
1443# AC97 GPIO expanders:
1444#
1445
1446#
1447# MODULbus GPIO expanders:
1448#
1449
1450#
1451# USB GPIO expanders:
1452#
1453# CONFIG_W1 is not set
1454# CONFIG_POWER_SUPPLY is not set
1455# CONFIG_POWER_AVS is not set
1456CONFIG_HWMON=y
1457# CONFIG_HWMON_VID is not set
1458# CONFIG_HWMON_DEBUG_CHIP is not set
1459
1460#
1461# Native drivers
1462#
1463# CONFIG_SENSORS_AD7314 is not set
1464# CONFIG_SENSORS_AD7414 is not set
1465# CONFIG_SENSORS_AD7418 is not set
1466# CONFIG_SENSORS_ADCXX is not set
1467# CONFIG_SENSORS_ADM1021 is not set
1468# CONFIG_SENSORS_ADM1025 is not set
1469# CONFIG_SENSORS_ADM1026 is not set
1470# CONFIG_SENSORS_ADM1029 is not set
1471# CONFIG_SENSORS_ADM1031 is not set
1472# CONFIG_SENSORS_ADM9240 is not set
1473# CONFIG_SENSORS_ADT7410 is not set
1474# CONFIG_SENSORS_ADT7411 is not set
1475# CONFIG_SENSORS_ADT7462 is not set
1476# CONFIG_SENSORS_ADT7470 is not set
1477# CONFIG_SENSORS_ADT7475 is not set
1478# CONFIG_SENSORS_ASC7621 is not set
1479# CONFIG_SENSORS_ATXP1 is not set
1480# CONFIG_SENSORS_DS620 is not set
1481# CONFIG_SENSORS_DS1621 is not set
1482# CONFIG_SENSORS_I5K_AMB is not set
1483# CONFIG_SENSORS_F71805F is not set
1484# CONFIG_SENSORS_F71882FG is not set
1485# CONFIG_SENSORS_F75375S is not set
1486# CONFIG_SENSORS_G760A is not set
1487# CONFIG_SENSORS_GL518SM is not set
1488# CONFIG_SENSORS_GL520SM is not set
1489# CONFIG_SENSORS_GPIO_FAN is not set
1490# CONFIG_SENSORS_HIH6130 is not set
1491# CONFIG_SENSORS_IT87 is not set
1492# CONFIG_SENSORS_JC42 is not set
1493# CONFIG_SENSORS_LINEAGE is not set
1494# CONFIG_SENSORS_LM63 is not set
1495# CONFIG_SENSORS_LM70 is not set
1496# CONFIG_SENSORS_LM73 is not set
1497# CONFIG_SENSORS_LM75 is not set
1498# CONFIG_SENSORS_LM77 is not set
1499# CONFIG_SENSORS_LM78 is not set
1500# CONFIG_SENSORS_LM80 is not set
1501# CONFIG_SENSORS_LM83 is not set
1502# CONFIG_SENSORS_LM85 is not set
1503# CONFIG_SENSORS_LM87 is not set
1504# CONFIG_SENSORS_LM90 is not set
1505# CONFIG_SENSORS_LM92 is not set
1506# CONFIG_SENSORS_LM93 is not set
1507# CONFIG_SENSORS_LTC4151 is not set
1508# CONFIG_SENSORS_LTC4215 is not set
1509# CONFIG_SENSORS_LTC4245 is not set
1510# CONFIG_SENSORS_LTC4261 is not set
1511# CONFIG_SENSORS_LM95241 is not set
1512# CONFIG_SENSORS_LM95245 is not set
1513# CONFIG_SENSORS_MAX1111 is not set
1514# CONFIG_SENSORS_MAX16065 is not set
1515# CONFIG_SENSORS_MAX1619 is not set
1516# CONFIG_SENSORS_MAX1668 is not set
1517# CONFIG_SENSORS_MAX197 is not set
1518# CONFIG_SENSORS_MAX6639 is not set
1519# CONFIG_SENSORS_MAX6642 is not set
1520# CONFIG_SENSORS_MAX6650 is not set
1521# CONFIG_SENSORS_MCP3021 is not set
1522# CONFIG_SENSORS_NTC_THERMISTOR is not set
1523# CONFIG_SENSORS_PC87360 is not set
1524# CONFIG_SENSORS_PC87427 is not set
1525# CONFIG_SENSORS_PCF8591 is not set
1526# CONFIG_PMBUS is not set
1527# CONFIG_SENSORS_SHT15 is not set
1528# CONFIG_SENSORS_SHT21 is not set
1529# CONFIG_SENSORS_SIS5595 is not set
1530# CONFIG_SENSORS_SMM665 is not set
1531# CONFIG_SENSORS_DME1737 is not set
1532# CONFIG_SENSORS_EMC1403 is not set
1533# CONFIG_SENSORS_EMC2103 is not set
1534# CONFIG_SENSORS_EMC6W201 is not set
1535# CONFIG_SENSORS_SMSC47M1 is not set
1536# CONFIG_SENSORS_SMSC47M192 is not set
1537# CONFIG_SENSORS_SMSC47B397 is not set
1538# CONFIG_SENSORS_SCH56XX_COMMON is not set
1539# CONFIG_SENSORS_SCH5627 is not set
1540# CONFIG_SENSORS_SCH5636 is not set
1541# CONFIG_SENSORS_ADS1015 is not set
1542# CONFIG_SENSORS_ADS7828 is not set
1543# CONFIG_SENSORS_ADS7871 is not set
1544# CONFIG_SENSORS_AMC6821 is not set
1545# CONFIG_SENSORS_INA2XX is not set
1546# CONFIG_SENSORS_THMC50 is not set
1547# CONFIG_SENSORS_TMP102 is not set
1548# CONFIG_SENSORS_TMP401 is not set
1549# CONFIG_SENSORS_TMP421 is not set
1550# CONFIG_SENSORS_VEXPRESS is not set
1551# CONFIG_SENSORS_VIA686A is not set
1552# CONFIG_SENSORS_VT1211 is not set
1553# CONFIG_SENSORS_VT8231 is not set
1554# CONFIG_SENSORS_W83781D is not set
1555# CONFIG_SENSORS_W83791D is not set
1556# CONFIG_SENSORS_W83792D is not set
1557# CONFIG_SENSORS_W83793 is not set
1558# CONFIG_SENSORS_W83795 is not set
1559# CONFIG_SENSORS_W83L785TS is not set
1560# CONFIG_SENSORS_W83L786NG is not set
1561# CONFIG_SENSORS_W83627HF is not set
1562# CONFIG_SENSORS_W83627EHF is not set
1563CONFIG_SENSORS_XADCPS=y
1564# CONFIG_THERMAL is not set
1565CONFIG_WATCHDOG=y
1566CONFIG_WATCHDOG_CORE=y
1567# CONFIG_WATCHDOG_NOWAYOUT is not set
1568
1569#
1570# Watchdog Device Drivers
1571#
1572# CONFIG_SOFT_WATCHDOG is not set
1573# CONFIG_ARM_SP805_WATCHDOG is not set
1574# CONFIG_DW_WATCHDOG is not set
1575CONFIG_MPCORE_WATCHDOG=y
1576CONFIG_XILINX_PS_WATCHDOG=y
1577# CONFIG_MAX63XX_WATCHDOG is not set
1578# CONFIG_ALIM7101_WDT is not set
1579# CONFIG_I6300ESB_WDT is not set
1580# CONFIG_XILINX_WATCHDOG is not set
1581
1582#
1583# PCI-based Watchdog Cards
1584#
1585# CONFIG_PCIPCWATCHDOG is not set
1586# CONFIG_WDTPCI is not set
1587
1588#
1589# USB-based Watchdog Cards
1590#
1591# CONFIG_USBPCWATCHDOG is not set
1592CONFIG_SSB_POSSIBLE=y
1593
1594#
1595# Sonics Silicon Backplane
1596#
1597# CONFIG_SSB is not set
1598CONFIG_BCMA_POSSIBLE=y
1599
1600#
1601# Broadcom specific AMBA
1602#
1603# CONFIG_BCMA is not set
1604
1605#
1606# Multifunction device drivers
1607#
1608# CONFIG_MFD_CORE is not set
1609# CONFIG_MFD_88PM860X is not set
1610# CONFIG_MFD_88PM800 is not set
1611# CONFIG_MFD_88PM805 is not set
1612# CONFIG_MFD_SM501 is not set
1613# CONFIG_MFD_RTSX_PCI is not set
1614# CONFIG_MFD_ASIC3 is not set
1615# CONFIG_MFD_TI_AM335X_TSCADC is not set
1616# CONFIG_HTC_EGPIO is not set
1617# CONFIG_HTC_PASIC3 is not set
1618# CONFIG_HTC_I2CPLD is not set
1619# CONFIG_MFD_LM3533 is not set
1620# CONFIG_TPS6105X is not set
1621# CONFIG_TPS65010 is not set
1622# CONFIG_TPS6507X is not set
1623# CONFIG_MFD_TPS65217 is not set
1624# CONFIG_MFD_TPS6586X is not set
1625# CONFIG_MFD_TPS65910 is not set
1626# CONFIG_MFD_TPS65912_I2C is not set
1627# CONFIG_MFD_TPS65912_SPI is not set
1628# CONFIG_MFD_TPS80031 is not set
1629# CONFIG_TWL4030_CORE is not set
1630# CONFIG_TWL6040_CORE is not set
1631# CONFIG_MFD_STMPE is not set
1632# CONFIG_MFD_TC3589X is not set
1633# CONFIG_MFD_TMIO is not set
1634# CONFIG_MFD_T7L66XB is not set
1635# CONFIG_MFD_SMSC is not set
1636# CONFIG_MFD_TC6387XB is not set
1637# CONFIG_MFD_TC6393XB is not set
1638# CONFIG_PMIC_DA903X is not set
1639# CONFIG_MFD_DA9052_SPI is not set
1640# CONFIG_MFD_DA9052_I2C is not set
1641# CONFIG_MFD_DA9055 is not set
1642# CONFIG_PMIC_ADP5520 is not set
1643# CONFIG_MFD_LP8788 is not set
1644# CONFIG_MFD_MAX77686 is not set
1645# CONFIG_MFD_MAX77693 is not set
1646# CONFIG_MFD_MAX8907 is not set
1647# CONFIG_MFD_MAX8925 is not set
1648# CONFIG_MFD_MAX8997 is not set
1649# CONFIG_MFD_MAX8998 is not set
1650# CONFIG_MFD_SEC_CORE is not set
1651# CONFIG_MFD_ARIZONA_I2C is not set
1652# CONFIG_MFD_ARIZONA_SPI is not set
1653# CONFIG_MFD_WM8400 is not set
1654# CONFIG_MFD_WM831X_I2C is not set
1655# CONFIG_MFD_WM831X_SPI is not set
1656# CONFIG_MFD_WM8350_I2C is not set
1657# CONFIG_MFD_WM8994 is not set
1658# CONFIG_MFD_PCF50633 is not set
1659# CONFIG_MFD_MC13XXX_SPI is not set
1660# CONFIG_MFD_MC13XXX_I2C is not set
1661# CONFIG_ABX500_CORE is not set
1662# CONFIG_EZX_PCAP is not set
1663# CONFIG_MFD_TIMBERDALE is not set
1664# CONFIG_LPC_SCH is not set
1665# CONFIG_LPC_ICH is not set
1666# CONFIG_MFD_RDC321X is not set
1667# CONFIG_MFD_JANZ_CMODIO is not set
1668# CONFIG_MFD_VX855 is not set
1669# CONFIG_MFD_WL1273_CORE is not set
1670# CONFIG_MFD_TPS65090 is not set
1671# CONFIG_MFD_AAT2870_CORE is not set
1672# CONFIG_MFD_RC5T583 is not set
1673# CONFIG_MFD_SYSCON is not set
1674# CONFIG_MFD_PALMAS is not set
1675# CONFIG_MFD_VIPERBOARD is not set
1676# CONFIG_MFD_RETU is not set
1677# CONFIG_MFD_AS3711 is not set
1678CONFIG_VEXPRESS_CONFIG=y
1679# CONFIG_REGULATOR is not set
1680CONFIG_MEDIA_SUPPORT=y
1681
1682#
1683# Multimedia core support
1684#
1685# CONFIG_MEDIA_CAMERA_SUPPORT is not set
1686# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set
1687# CONFIG_MEDIA_DIGITAL_TV_SUPPORT is not set
1688# CONFIG_MEDIA_RADIO_SUPPORT is not set
1689# CONFIG_MEDIA_RC_SUPPORT is not set
1690# CONFIG_VIDEO_ADV_DEBUG is not set
1691# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
1692
1693#
1694# Media drivers
1695#
1696# CONFIG_MEDIA_USB_SUPPORT is not set
1697# CONFIG_MEDIA_PCI_SUPPORT is not set
1698
1699#
1700# Supported MMC/SDIO adapters
1701#
1702
1703#
1704# Media ancillary drivers (tuners, sensors, i2c, frontends)
1705#
1706
1707#
1708# Customise DVB Frontends
1709#
1710# CONFIG_DVB_TUNER_DIB0070 is not set
1711# CONFIG_DVB_TUNER_DIB0090 is not set
1712
1713#
1714# Tools to develop new frontends
1715#
1716# CONFIG_DVB_DUMMY_FE is not set
1717
1718#
1719# Graphics support
1720#
1721CONFIG_VGA_ARB=y
1722CONFIG_VGA_ARB_MAX_GPUS=16
1723# CONFIG_DRM is not set
1724# CONFIG_STUB_POULSBO is not set
1725# CONFIG_VGASTATE is not set
1726# CONFIG_VIDEO_OUTPUT_CONTROL is not set
1727CONFIG_FB=y
1728# CONFIG_FIRMWARE_EDID is not set
1729# CONFIG_FB_DDC is not set
1730# CONFIG_FB_BOOT_VESA_SUPPORT is not set
1731CONFIG_FB_CFB_FILLRECT=y
1732CONFIG_FB_CFB_COPYAREA=y
1733CONFIG_FB_CFB_IMAGEBLIT=y
1734# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
1735# CONFIG_FB_SYS_FILLRECT is not set
1736# CONFIG_FB_SYS_COPYAREA is not set
1737# CONFIG_FB_SYS_IMAGEBLIT is not set
1738# CONFIG_FB_FOREIGN_ENDIAN is not set
1739# CONFIG_FB_SYS_FOPS is not set
1740# CONFIG_FB_WMT_GE_ROPS is not set
1741# CONFIG_FB_SVGALIB is not set
1742# CONFIG_FB_MACMODES is not set
1743# CONFIG_FB_BACKLIGHT is not set
1744# CONFIG_FB_MODE_HELPERS is not set
1745# CONFIG_FB_TILEBLITTING is not set
1746
1747#
1748# Frame buffer hardware drivers
1749#
1750# CONFIG_FB_CIRRUS is not set
1751# CONFIG_FB_PM2 is not set
1752# CONFIG_FB_ARMCLCD is not set
1753# CONFIG_FB_CYBER2000 is not set
1754# CONFIG_FB_ASILIANT is not set
1755# CONFIG_FB_IMSTT is not set
1756# CONFIG_FB_UVESA is not set
1757# CONFIG_FB_S1D13XXX is not set
1758# CONFIG_FB_NVIDIA is not set
1759# CONFIG_FB_RIVA is not set
1760# CONFIG_FB_I740 is not set
1761# CONFIG_FB_MATROX is not set
1762# CONFIG_FB_RADEON is not set
1763# CONFIG_FB_ATY128 is not set
1764# CONFIG_FB_ATY is not set
1765# CONFIG_FB_S3 is not set
1766# CONFIG_FB_SAVAGE is not set
1767# CONFIG_FB_SIS is not set
1768# CONFIG_FB_NEOMAGIC is not set
1769# CONFIG_FB_KYRO is not set
1770# CONFIG_FB_3DFX is not set
1771# CONFIG_FB_VOODOO1 is not set
1772# CONFIG_FB_VT8623 is not set
1773# CONFIG_FB_TRIDENT is not set
1774# CONFIG_FB_ARK is not set
1775# CONFIG_FB_PM3 is not set
1776# CONFIG_FB_CARMINE is not set
1777# CONFIG_FB_SMSCUFX is not set
1778# CONFIG_FB_UDL is not set
1779# CONFIG_FB_XILINX is not set
1780# CONFIG_FB_VIRTUAL is not set
1781# CONFIG_FB_METRONOME is not set
1782# CONFIG_FB_MB862XX is not set
1783# CONFIG_FB_BROADSHEET is not set
1784# CONFIG_FB_AUO_K190X is not set
1785CONFIG_FB_XYLON=y
1786# CONFIG_FB_XYLON_PLATFORM is not set
1787CONFIG_FB_XYLON_OF=y
1788# CONFIG_FB_XYLON_PIXCLK is not set
1789# CONFIG_FB_XYLON_MISC is not set
1790# CONFIG_EXYNOS_VIDEO is not set
1791# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
1792
1793#
1794# Console display driver support
1795#
1796CONFIG_DUMMY_CONSOLE=y
1797CONFIG_FRAMEBUFFER_CONSOLE=y
1798# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
1799# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
1800CONFIG_FONTS=y
1801CONFIG_FONT_8x8=y
1802CONFIG_FONT_8x16=y
1803# CONFIG_FONT_6x11 is not set
1804# CONFIG_FONT_7x14 is not set
1805# CONFIG_FONT_PEARL_8x8 is not set
1806# CONFIG_FONT_ACORN_8x8 is not set
1807# CONFIG_FONT_MINI_4x6 is not set
1808# CONFIG_FONT_SUN8x16 is not set
1809# CONFIG_FONT_SUN12x22 is not set
1810# CONFIG_FONT_10x18 is not set
1811# CONFIG_LOGO is not set
1812# CONFIG_FB_SSD1307 is not set
1813# CONFIG_SOUND is not set
1814
1815#
1816# HID support
1817#
1818CONFIG_HID=y
1819# CONFIG_HIDRAW is not set
1820# CONFIG_UHID is not set
1821CONFIG_HID_GENERIC=y
1822
1823#
1824# Special HID drivers
1825#
1826# CONFIG_HID_A4TECH is not set
1827# CONFIG_HID_ACRUX is not set
1828# CONFIG_HID_APPLE is not set
1829# CONFIG_HID_AUREAL is not set
1830# CONFIG_HID_BELKIN is not set
1831# CONFIG_HID_CHERRY is not set
1832# CONFIG_HID_CHICONY is not set
1833# CONFIG_HID_CYPRESS is not set
1834# CONFIG_HID_DRAGONRISE is not set
1835# CONFIG_HID_EMS_FF is not set
1836# CONFIG_HID_EZKEY is not set
1837# CONFIG_HID_HOLTEK is not set
1838# CONFIG_HID_KEYTOUCH is not set
1839# CONFIG_HID_KYE is not set
1840# CONFIG_HID_UCLOGIC is not set
1841# CONFIG_HID_WALTOP is not set
1842# CONFIG_HID_GYRATION is not set
1843# CONFIG_HID_TWINHAN is not set
1844# CONFIG_HID_KENSINGTON is not set
1845# CONFIG_HID_LCPOWER is not set
1846# CONFIG_HID_LENOVO_TPKBD is not set
1847# CONFIG_HID_LOGITECH is not set
1848CONFIG_HID_MICROSOFT=y
1849# CONFIG_HID_MONTEREY is not set
1850# CONFIG_HID_MULTITOUCH is not set
1851# CONFIG_HID_NTRIG is not set
1852# CONFIG_HID_ORTEK is not set
1853# CONFIG_HID_PANTHERLORD is not set
1854# CONFIG_HID_PETALYNX is not set
1855# CONFIG_HID_PICOLCD is not set
1856# CONFIG_HID_PRIMAX is not set
1857# CONFIG_HID_ROCCAT is not set
1858# CONFIG_HID_SAITEK is not set
1859# CONFIG_HID_SAMSUNG is not set
1860# CONFIG_HID_SONY is not set
1861# CONFIG_HID_SPEEDLINK is not set
1862# CONFIG_HID_SUNPLUS is not set
1863# CONFIG_HID_GREENASIA is not set
1864# CONFIG_HID_SMARTJOYPLUS is not set
1865# CONFIG_HID_TIVO is not set
1866# CONFIG_HID_TOPSEED is not set
1867# CONFIG_HID_THRUSTMASTER is not set
1868# CONFIG_HID_ZEROPLUS is not set
1869# CONFIG_HID_ZYDACRON is not set
1870# CONFIG_HID_SENSOR_HUB is not set
1871
1872#
1873# USB HID support
1874#
1875CONFIG_USB_HID=y
1876# CONFIG_HID_PID is not set
1877# CONFIG_USB_HIDDEV is not set
1878
1879#
1880# I2C HID support
1881#
1882# CONFIG_I2C_HID is not set
1883CONFIG_USB_ARCH_HAS_OHCI=y
1884CONFIG_USB_ARCH_HAS_EHCI=y
1885CONFIG_USB_ARCH_HAS_XHCI=y
1886CONFIG_USB_SUPPORT=y
1887CONFIG_USB_COMMON=y
1888CONFIG_USB_ARCH_HAS_HCD=y
1889CONFIG_USB=y
1890# CONFIG_USB_DEBUG is not set
1891# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set
1892
1893#
1894# Miscellaneous USB options
1895#
1896# CONFIG_USB_DYNAMIC_MINORS is not set
1897CONFIG_USB_SUSPEND=y
1898CONFIG_USB_OTG=y
1899# CONFIG_USB_OTG_WHITELIST is not set
1900# CONFIG_USB_OTG_BLACKLIST_HUB is not set
1901# CONFIG_USB_DWC3 is not set
1902# CONFIG_USB_MON is not set
1903# CONFIG_USB_WUSB_CBAF is not set
1904
1905#
1906# USB Host Controller Drivers
1907#
1908# CONFIG_USB_C67X00_HCD is not set
1909# CONFIG_USB_XHCI_HCD is not set
1910CONFIG_USB_EHCI_HCD=y
1911CONFIG_USB_EHCI_ROOT_HUB_TT=y
1912# CONFIG_USB_EHCI_TT_NEWSCHED is not set
1913CONFIG_USB_EHCI_PCI=y
1914CONFIG_USB_XUSBPS_DR_OF=y
1915CONFIG_USB_EHCI_XUSBPS=y
1916# CONFIG_USB_OXU210HP_HCD is not set
1917# CONFIG_USB_ISP116X_HCD is not set
1918# CONFIG_USB_ISP1760_HCD is not set
1919# CONFIG_USB_ISP1362_HCD is not set
1920# CONFIG_USB_OHCI_HCD is not set
1921# CONFIG_USB_EHCI_HCD_PLATFORM is not set
1922# CONFIG_USB_UHCI_HCD is not set
1923# CONFIG_USB_SL811_HCD is not set
1924# CONFIG_USB_R8A66597_HCD is not set
1925# CONFIG_USB_MUSB_HDRC is not set
1926# CONFIG_USB_CHIPIDEA is not set
1927# CONFIG_USB_RENESAS_USBHS is not set
1928
1929#
1930# USB Device Class drivers
1931#
1932# CONFIG_USB_ACM is not set
1933# CONFIG_USB_PRINTER is not set
1934# CONFIG_USB_WDM is not set
1935# CONFIG_USB_TMC is not set
1936
1937#
1938# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
1939#
1940
1941#
1942# also be needed; see USB_STORAGE Help for more info
1943#
1944CONFIG_USB_STORAGE=y
1945# CONFIG_USB_STORAGE_DEBUG is not set
1946# CONFIG_USB_STORAGE_REALTEK is not set
1947# CONFIG_USB_STORAGE_DATAFAB is not set
1948# CONFIG_USB_STORAGE_FREECOM is not set
1949# CONFIG_USB_STORAGE_ISD200 is not set
1950# CONFIG_USB_STORAGE_USBAT is not set
1951# CONFIG_USB_STORAGE_SDDR09 is not set
1952# CONFIG_USB_STORAGE_SDDR55 is not set
1953# CONFIG_USB_STORAGE_JUMPSHOT is not set
1954# CONFIG_USB_STORAGE_ALAUDA is not set
1955# CONFIG_USB_STORAGE_ONETOUCH is not set
1956# CONFIG_USB_STORAGE_KARMA is not set
1957# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
1958# CONFIG_USB_STORAGE_ENE_UB6250 is not set
1959
1960#
1961# USB Imaging devices
1962#
1963# CONFIG_USB_MDC800 is not set
1964# CONFIG_USB_MICROTEK is not set
1965
1966#
1967# USB port drivers
1968#
1969# CONFIG_USB_SERIAL is not set
1970
1971#
1972# USB Miscellaneous drivers
1973#
1974# CONFIG_USB_EMI62 is not set
1975# CONFIG_USB_EMI26 is not set
1976# CONFIG_USB_ADUTUX is not set
1977# CONFIG_USB_SEVSEG is not set
1978# CONFIG_USB_RIO500 is not set
1979# CONFIG_USB_LEGOTOWER is not set
1980# CONFIG_USB_LCD is not set
1981# CONFIG_USB_LED is not set
1982# CONFIG_USB_CYPRESS_CY7C63 is not set
1983# CONFIG_USB_CYTHERM is not set
1984# CONFIG_USB_IDMOUSE is not set
1985# CONFIG_USB_FTDI_ELAN is not set
1986# CONFIG_USB_APPLEDISPLAY is not set
1987# CONFIG_USB_SISUSBVGA is not set
1988# CONFIG_USB_LD is not set
1989# CONFIG_USB_TRANCEVIBRATOR is not set
1990# CONFIG_USB_IOWARRIOR is not set
1991# CONFIG_USB_TEST is not set
1992# CONFIG_USB_ISIGHTFW is not set
1993# CONFIG_USB_YUREX is not set
1994# CONFIG_USB_EZUSB_FX2 is not set
1995
1996#
1997# USB Physical Layer drivers
1998#
1999# CONFIG_USB_ISP1301 is not set
2000# CONFIG_USB_RCAR_PHY is not set
2001CONFIG_USB_GADGET=y
2002# CONFIG_USB_GADGET_DEBUG is not set
2003# CONFIG_USB_GADGET_DEBUG_FILES is not set
2004# CONFIG_USB_GADGET_DEBUG_FS is not set
2005CONFIG_USB_GADGET_VBUS_DRAW=2
2006CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2
2007
2008#
2009# USB Peripheral Controller
2010#
2011CONFIG_USB_GADGET_XUSBPS=y
2012CONFIG_XUSBPS_ERRATA_DT654401=y
2013CONFIG_USB_XUSBPS=y
2014# CONFIG_USB_FUSB300 is not set
2015# CONFIG_USB_R8A66597 is not set
2016# CONFIG_USB_GADGET_XILINX is not set
2017# CONFIG_USB_MV_UDC is not set
2018# CONFIG_USB_M66592 is not set
2019# CONFIG_USB_AMD5536UDC is not set
2020# CONFIG_USB_NET2272 is not set
2021# CONFIG_USB_NET2280 is not set
2022# CONFIG_USB_GOKU is not set
2023# CONFIG_USB_EG20T is not set
2024# CONFIG_USB_DUMMY_HCD is not set
2025CONFIG_USB_LIBCOMPOSITE=m
2026CONFIG_USB_ZERO=m
2027# CONFIG_USB_ZERO_HNPTEST is not set
2028# CONFIG_USB_ETH is not set
2029# CONFIG_USB_G_NCM is not set
2030# CONFIG_USB_GADGETFS is not set
2031# CONFIG_USB_FUNCTIONFS is not set
2032# CONFIG_USB_MASS_STORAGE is not set
2033# CONFIG_USB_G_SERIAL is not set
2034# CONFIG_USB_G_PRINTER is not set
2035# CONFIG_USB_CDC_COMPOSITE is not set
2036# CONFIG_USB_G_ACM_MS is not set
2037# CONFIG_USB_G_MULTI is not set
2038# CONFIG_USB_G_HID is not set
2039# CONFIG_USB_G_DBGP is not set
2040
2041#
2042# OTG and related infrastructure
2043#
2044CONFIG_USB_OTG_UTILS=y
2045# CONFIG_USB_GPIO_VBUS is not set
2046CONFIG_USB_ULPI=y
2047CONFIG_USB_ULPI_VIEWPORT=y
2048# CONFIG_NOP_USB_XCEIV is not set
2049CONFIG_USB_XUSBPS_OTG=y
2050# CONFIG_UWB is not set
2051CONFIG_MMC=y
2052# CONFIG_MMC_DEBUG is not set
2053# CONFIG_MMC_UNSAFE_RESUME is not set
2054# CONFIG_MMC_CLKGATE is not set
2055
2056#
2057# MMC/SD/SDIO Card Drivers
2058#
2059CONFIG_MMC_BLOCK=y
2060CONFIG_MMC_BLOCK_MINORS=8
2061CONFIG_MMC_BLOCK_BOUNCE=y
2062# CONFIG_SDIO_UART is not set
2063# CONFIG_MMC_TEST is not set
2064
2065#
2066# MMC/SD/SDIO Host Controller Drivers
2067#
2068# CONFIG_MMC_ARMMMCI is not set
2069CONFIG_MMC_SDHCI=y
2070# CONFIG_MMC_SDHCI_PCI is not set
2071CONFIG_MMC_SDHCI_PLTFM=y
2072CONFIG_MMC_SDHCI_OF_XILINX_PS=y
2073# CONFIG_MMC_SDHCI_PXAV3 is not set
2074# CONFIG_MMC_SDHCI_PXAV2 is not set
2075# CONFIG_MMC_TIFM_SD is not set
2076# CONFIG_MMC_CB710 is not set
2077# CONFIG_MMC_VIA_SDMMC is not set
2078# CONFIG_MMC_DW is not set
2079# CONFIG_MMC_VUB300 is not set
2080# CONFIG_MMC_USHC is not set
2081# CONFIG_MEMSTICK is not set
2082# CONFIG_NEW_LEDS is not set
2083# CONFIG_ACCESSIBILITY is not set
2084# CONFIG_INFINIBAND is not set
2085# CONFIG_EDAC is not set
2086CONFIG_RTC_LIB=y
2087CONFIG_RTC_CLASS=y
2088CONFIG_RTC_HCTOSYS=y
2089CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
2090# CONFIG_RTC_DEBUG is not set
2091
2092#
2093# RTC interfaces
2094#
2095CONFIG_RTC_INTF_SYSFS=y
2096CONFIG_RTC_INTF_PROC=y
2097CONFIG_RTC_INTF_DEV=y
2098# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
2099# CONFIG_RTC_DRV_TEST is not set
2100
2101#
2102# I2C RTC drivers
2103#
2104# CONFIG_RTC_DRV_DS1307 is not set
2105# CONFIG_RTC_DRV_DS1374 is not set
2106# CONFIG_RTC_DRV_DS1672 is not set
2107# CONFIG_RTC_DRV_DS3232 is not set
2108# CONFIG_RTC_DRV_MAX6900 is not set
2109# CONFIG_RTC_DRV_RS5C372 is not set
2110# CONFIG_RTC_DRV_ISL1208 is not set
2111# CONFIG_RTC_DRV_ISL12022 is not set
2112# CONFIG_RTC_DRV_X1205 is not set
2113# CONFIG_RTC_DRV_PCF8523 is not set
2114CONFIG_RTC_DRV_PCF8563=y
2115# CONFIG_RTC_DRV_PCF8583 is not set
2116# CONFIG_RTC_DRV_M41T80 is not set
2117# CONFIG_RTC_DRV_BQ32K is not set
2118# CONFIG_RTC_DRV_S35390A is not set
2119# CONFIG_RTC_DRV_FM3130 is not set
2120# CONFIG_RTC_DRV_RX8581 is not set
2121# CONFIG_RTC_DRV_RX8025 is not set
2122# CONFIG_RTC_DRV_EM3027 is not set
2123# CONFIG_RTC_DRV_RV3029C2 is not set
2124
2125#
2126# SPI RTC drivers
2127#
2128# CONFIG_RTC_DRV_M41T93 is not set
2129# CONFIG_RTC_DRV_M41T94 is not set
2130# CONFIG_RTC_DRV_DS1305 is not set
2131# CONFIG_RTC_DRV_DS1390 is not set
2132# CONFIG_RTC_DRV_MAX6902 is not set
2133# CONFIG_RTC_DRV_R9701 is not set
2134# CONFIG_RTC_DRV_RS5C348 is not set
2135# CONFIG_RTC_DRV_DS3234 is not set
2136# CONFIG_RTC_DRV_PCF2123 is not set
2137
2138#
2139# Platform RTC drivers
2140#
2141# CONFIG_RTC_DRV_CMOS is not set
2142# CONFIG_RTC_DRV_DS1286 is not set
2143# CONFIG_RTC_DRV_DS1511 is not set
2144# CONFIG_RTC_DRV_DS1553 is not set
2145# CONFIG_RTC_DRV_DS1742 is not set
2146# CONFIG_RTC_DRV_STK17TA8 is not set
2147# CONFIG_RTC_DRV_M48T86 is not set
2148# CONFIG_RTC_DRV_M48T35 is not set
2149# CONFIG_RTC_DRV_M48T59 is not set
2150# CONFIG_RTC_DRV_MSM6242 is not set
2151# CONFIG_RTC_DRV_BQ4802 is not set
2152# CONFIG_RTC_DRV_RP5C01 is not set
2153# CONFIG_RTC_DRV_V3020 is not set
2154# CONFIG_RTC_DRV_DS2404 is not set
2155
2156#
2157# on-CPU RTC drivers
2158#
2159# CONFIG_RTC_DRV_PL030 is not set
2160# CONFIG_RTC_DRV_PL031 is not set
2161# CONFIG_RTC_DRV_SNVS is not set
2162CONFIG_DMADEVICES=y
2163# CONFIG_DMADEVICES_DEBUG is not set
2164
2165#
2166# DMA Devices
2167#
2168CONFIG_XILINX_DMA_ENGINES=y
2169CONFIG_XILINX_AXIDMA=y
2170# CONFIG_XILINX_DMATEST is not set
2171CONFIG_XILINX_AXIVDMA=y
2172# CONFIG_XILINX_VDMATEST is not set
2173CONFIG_XILINX_AXICDMA=y
2174# CONFIG_XILINX_CDMATEST is not set
2175# CONFIG_AMBA_PL08X is not set
2176# CONFIG_DW_DMAC is not set
2177# CONFIG_TIMB_DMA is not set
2178CONFIG_PL330_DMA=y
2179
2180#
2181# DMA Clients
2182#
2183# CONFIG_NET_DMA is not set
2184# CONFIG_ASYNC_TX_DMA is not set
2185# CONFIG_DMATEST is not set
2186# CONFIG_AUXDISPLAY is not set
2187CONFIG_UIO=y
2188# CONFIG_UIO_CIF is not set
2189CONFIG_UIO_PDRV_GENIRQ=y
2190# CONFIG_UIO_DMEM_GENIRQ is not set
2191# CONFIG_UIO_AEC is not set
2192# CONFIG_UIO_SERCOS3 is not set
2193# CONFIG_UIO_PCI_GENERIC is not set
2194# CONFIG_UIO_NETX is not set
2195CONFIG_VIRTIO=m
2196
2197#
2198# Virtio drivers
2199#
2200# CONFIG_VIRTIO_PCI is not set
2201# CONFIG_VIRTIO_BALLOON is not set
2202# CONFIG_VIRTIO_MMIO is not set
2203
2204#
2205# Microsoft Hyper-V guest support
2206#
2207# CONFIG_STAGING is not set
2208CONFIG_CLKDEV_LOOKUP=y
2209CONFIG_HAVE_CLK_PREPARE=y
2210CONFIG_COMMON_CLK=y
2211
2212#
2213# Common Clock Framework
2214#
2215CONFIG_COMMON_CLK_DEBUG=y
2216CONFIG_COMMON_CLK_VERSATILE=y
2217
2218#
2219# Hardware Spinlock drivers
2220#
2221CONFIG_CLKSRC_MMIO=y
2222CONFIG_IOMMU_SUPPORT=y
2223CONFIG_OF_IOMMU=y
2224
2225#
2226# Remoteproc drivers (EXPERIMENTAL)
2227#
2228CONFIG_REMOTEPROC=m
2229# CONFIG_STE_MODEM_RPROC is not set
2230CONFIG_ZYNQ_REMOTEPROC=m
2231CONFIG_MB_REMOTEPROC=m
2232
2233#
2234# Rpmsg drivers (EXPERIMENTAL)
2235#
2236CONFIG_RPMSG=m
2237# CONFIG_RPMSG_SERVER_SAMPLE is not set
2238# CONFIG_RPMSG_OMX is not set
2239# CONFIG_RPMSG_FREERTOS_STAT is not set
2240# CONFIG_VIRT_DRIVERS is not set
2241# CONFIG_PM_DEVFREQ is not set
2242# CONFIG_EXTCON is not set
2243CONFIG_MEMORY=y
2244CONFIG_ZYNQ_SMC=y
2245# CONFIG_IIO is not set
2246# CONFIG_VME_BUS is not set
2247# CONFIG_PWM is not set
2248# CONFIG_PMODS is not set
2249# CONFIG_IPACK_BUS is not set
2250
2251#
2252# File systems
2253#
2254CONFIG_DCACHE_WORD_ACCESS=y
2255CONFIG_EXT2_FS=y
2256# CONFIG_EXT2_FS_XATTR is not set
2257# CONFIG_EXT2_FS_XIP is not set
2258CONFIG_EXT3_FS=y
2259# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
2260CONFIG_EXT3_FS_XATTR=y
2261# CONFIG_EXT3_FS_POSIX_ACL is not set
2262# CONFIG_EXT3_FS_SECURITY is not set
2263CONFIG_EXT4_FS=y
2264# CONFIG_EXT4_FS_POSIX_ACL is not set
2265# CONFIG_EXT4_FS_SECURITY is not set
2266# CONFIG_EXT4_DEBUG is not set
2267CONFIG_JBD=y
2268# CONFIG_JBD_DEBUG is not set
2269CONFIG_JBD2=y
2270# CONFIG_JBD2_DEBUG is not set
2271CONFIG_FS_MBCACHE=y
2272# CONFIG_REISERFS_FS is not set
2273# CONFIG_JFS_FS is not set
2274# CONFIG_XFS_FS is not set
2275# CONFIG_GFS2_FS is not set
2276# CONFIG_BTRFS_FS is not set
2277# CONFIG_NILFS2_FS is not set
2278# CONFIG_FS_POSIX_ACL is not set
2279CONFIG_FILE_LOCKING=y
2280CONFIG_FSNOTIFY=y
2281# CONFIG_DNOTIFY is not set
2282CONFIG_INOTIFY_USER=y
2283# CONFIG_FANOTIFY is not set
2284# CONFIG_QUOTA is not set
2285# CONFIG_QUOTACTL is not set
2286# CONFIG_AUTOFS4_FS is not set
2287# CONFIG_FUSE_FS is not set
2288
2289#
2290# Caches
2291#
2292# CONFIG_FSCACHE is not set
2293
2294#
2295# CD-ROM/DVD Filesystems
2296#
2297# CONFIG_ISO9660_FS is not set
2298# CONFIG_UDF_FS is not set
2299
2300#
2301# DOS/FAT/NT Filesystems
2302#
2303CONFIG_FAT_FS=y
2304CONFIG_MSDOS_FS=y
2305CONFIG_VFAT_FS=y
2306CONFIG_FAT_DEFAULT_CODEPAGE=437
2307CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
2308# CONFIG_NTFS_FS is not set
2309
2310#
2311# Pseudo filesystems
2312#
2313CONFIG_PROC_FS=y
2314CONFIG_PROC_SYSCTL=y
2315CONFIG_PROC_PAGE_MONITOR=y
2316CONFIG_SYSFS=y
2317CONFIG_TMPFS=y
2318# CONFIG_TMPFS_POSIX_ACL is not set
2319# CONFIG_TMPFS_XATTR is not set
2320# CONFIG_HUGETLB_PAGE is not set
2321# CONFIG_CONFIGFS_FS is not set
2322CONFIG_MISC_FILESYSTEMS=y
2323# CONFIG_ADFS_FS is not set
2324# CONFIG_AFFS_FS is not set
2325# CONFIG_HFS_FS is not set
2326# CONFIG_HFSPLUS_FS is not set
2327# CONFIG_BEFS_FS is not set
2328# CONFIG_BFS_FS is not set
2329# CONFIG_EFS_FS is not set
2330CONFIG_JFFS2_FS=y
2331CONFIG_JFFS2_FS_DEBUG=0
2332CONFIG_JFFS2_FS_WRITEBUFFER=y
2333# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
2334CONFIG_JFFS2_SUMMARY=y
2335# CONFIG_JFFS2_FS_XATTR is not set
2336# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
2337CONFIG_JFFS2_ZLIB=y
2338# CONFIG_JFFS2_LZO is not set
2339CONFIG_JFFS2_RTIME=y
2340# CONFIG_JFFS2_RUBIN is not set
2341# CONFIG_LOGFS is not set
2342# CONFIG_CRAMFS is not set
2343# CONFIG_SQUASHFS is not set
2344# CONFIG_VXFS_FS is not set
2345# CONFIG_MINIX_FS is not set
2346# CONFIG_OMFS_FS is not set
2347# CONFIG_HPFS_FS is not set
2348# CONFIG_QNX4FS_FS is not set
2349# CONFIG_QNX6FS_FS is not set
2350# CONFIG_ROMFS_FS is not set
2351# CONFIG_PSTORE is not set
2352# CONFIG_SYSV_FS is not set
2353# CONFIG_UFS_FS is not set
2354# CONFIG_F2FS_FS is not set
2355CONFIG_NETWORK_FILESYSTEMS=y
2356CONFIG_NFS_FS=y
2357CONFIG_NFS_V2=y
2358CONFIG_NFS_V3=y
2359# CONFIG_NFS_V3_ACL is not set
2360# CONFIG_NFS_V4 is not set
2361# CONFIG_NFS_SWAP is not set
2362CONFIG_ROOT_NFS=y
2363# CONFIG_NFSD is not set
2364CONFIG_LOCKD=y
2365CONFIG_LOCKD_V4=y
2366CONFIG_NFS_COMMON=y
2367CONFIG_SUNRPC=y
2368# CONFIG_SUNRPC_DEBUG is not set
2369# CONFIG_CEPH_FS is not set
2370# CONFIG_CIFS is not set
2371# CONFIG_NCP_FS is not set
2372# CONFIG_CODA_FS is not set
2373# CONFIG_AFS_FS is not set
2374CONFIG_NLS=y
2375CONFIG_NLS_DEFAULT="iso8859-1"
2376CONFIG_NLS_CODEPAGE_437=y
2377# CONFIG_NLS_CODEPAGE_737 is not set
2378# CONFIG_NLS_CODEPAGE_775 is not set
2379# CONFIG_NLS_CODEPAGE_850 is not set
2380# CONFIG_NLS_CODEPAGE_852 is not set
2381# CONFIG_NLS_CODEPAGE_855 is not set
2382# CONFIG_NLS_CODEPAGE_857 is not set
2383# CONFIG_NLS_CODEPAGE_860 is not set
2384# CONFIG_NLS_CODEPAGE_861 is not set
2385# CONFIG_NLS_CODEPAGE_862 is not set
2386# CONFIG_NLS_CODEPAGE_863 is not set
2387# CONFIG_NLS_CODEPAGE_864 is not set
2388# CONFIG_NLS_CODEPAGE_865 is not set
2389# CONFIG_NLS_CODEPAGE_866 is not set
2390# CONFIG_NLS_CODEPAGE_869 is not set
2391# CONFIG_NLS_CODEPAGE_936 is not set
2392# CONFIG_NLS_CODEPAGE_950 is not set
2393# CONFIG_NLS_CODEPAGE_932 is not set
2394# CONFIG_NLS_CODEPAGE_949 is not set
2395# CONFIG_NLS_CODEPAGE_874 is not set
2396# CONFIG_NLS_ISO8859_8 is not set
2397# CONFIG_NLS_CODEPAGE_1250 is not set
2398# CONFIG_NLS_CODEPAGE_1251 is not set
2399CONFIG_NLS_ASCII=y
2400CONFIG_NLS_ISO8859_1=y
2401# CONFIG_NLS_ISO8859_2 is not set
2402# CONFIG_NLS_ISO8859_3 is not set
2403# CONFIG_NLS_ISO8859_4 is not set
2404# CONFIG_NLS_ISO8859_5 is not set
2405# CONFIG_NLS_ISO8859_6 is not set
2406# CONFIG_NLS_ISO8859_7 is not set
2407# CONFIG_NLS_ISO8859_9 is not set
2408# CONFIG_NLS_ISO8859_13 is not set
2409# CONFIG_NLS_ISO8859_14 is not set
2410# CONFIG_NLS_ISO8859_15 is not set
2411# CONFIG_NLS_KOI8_R is not set
2412# CONFIG_NLS_KOI8_U is not set
2413# CONFIG_NLS_MAC_ROMAN is not set
2414# CONFIG_NLS_MAC_CELTIC is not set
2415# CONFIG_NLS_MAC_CENTEURO is not set
2416# CONFIG_NLS_MAC_CROATIAN is not set
2417# CONFIG_NLS_MAC_CYRILLIC is not set
2418# CONFIG_NLS_MAC_GAELIC is not set
2419# CONFIG_NLS_MAC_GREEK is not set
2420# CONFIG_NLS_MAC_ICELAND is not set
2421# CONFIG_NLS_MAC_INUIT is not set
2422# CONFIG_NLS_MAC_ROMANIAN is not set
2423# CONFIG_NLS_MAC_TURKISH is not set
2424# CONFIG_NLS_UTF8 is not set
2425
2426#
2427# Kernel hacking
2428#
2429# CONFIG_PRINTK_TIME is not set
2430CONFIG_DEFAULT_MESSAGE_LOGLEVEL=4
2431# CONFIG_ENABLE_WARN_DEPRECATED is not set
2432# CONFIG_ENABLE_MUST_CHECK is not set
2433CONFIG_FRAME_WARN=1024
2434# CONFIG_MAGIC_SYSRQ is not set
2435# CONFIG_STRIP_ASM_SYMS is not set
2436# CONFIG_READABLE_ASM is not set
2437# CONFIG_UNUSED_SYMBOLS is not set
2438CONFIG_DEBUG_FS=y
2439# CONFIG_HEADERS_CHECK is not set
2440# CONFIG_DEBUG_SECTION_MISMATCH is not set
2441CONFIG_DEBUG_KERNEL=y
2442# CONFIG_LOCKUP_DETECTOR is not set
2443# CONFIG_PANIC_ON_OOPS is not set
2444CONFIG_PANIC_ON_OOPS_VALUE=0
2445# CONFIG_DETECT_HUNG_TASK is not set
2446# CONFIG_SCHED_DEBUG is not set
2447# CONFIG_SCHEDSTATS is not set
2448CONFIG_TIMER_STATS=y
2449# CONFIG_DEBUG_OBJECTS is not set
2450# CONFIG_DEBUG_SLAB is not set
2451CONFIG_HAVE_DEBUG_KMEMLEAK=y
2452# CONFIG_DEBUG_KMEMLEAK is not set
2453CONFIG_DEBUG_PREEMPT=y
2454# CONFIG_DEBUG_RT_MUTEXES is not set
2455# CONFIG_RT_MUTEX_TESTER is not set
2456# CONFIG_DEBUG_SPINLOCK is not set
2457# CONFIG_DEBUG_MUTEXES is not set
2458# CONFIG_DEBUG_LOCK_ALLOC is not set
2459# CONFIG_PROVE_LOCKING is not set
2460# CONFIG_PROVE_RCU_DELAY is not set
2461# CONFIG_SPARSE_RCU_POINTER is not set
2462# CONFIG_LOCK_STAT is not set
2463# CONFIG_DEBUG_ATOMIC_SLEEP is not set
2464# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
2465# CONFIG_DEBUG_STACK_USAGE is not set
2466# CONFIG_DEBUG_KOBJECT is not set
2467# CONFIG_DEBUG_HIGHMEM is not set
2468# CONFIG_DEBUG_BUGVERBOSE is not set
2469# CONFIG_DEBUG_INFO is not set
2470# CONFIG_DEBUG_VM is not set
2471# CONFIG_DEBUG_WRITECOUNT is not set
2472# CONFIG_DEBUG_MEMORY_INIT is not set
2473# CONFIG_DEBUG_LIST is not set
2474# CONFIG_TEST_LIST_SORT is not set
2475# CONFIG_DEBUG_SG is not set
2476# CONFIG_DEBUG_NOTIFIERS is not set
2477# CONFIG_DEBUG_CREDENTIALS is not set
2478# CONFIG_BOOT_PRINTK_DELAY is not set
2479# CONFIG_RCU_TORTURE_TEST is not set
2480CONFIG_RCU_CPU_STALL_TIMEOUT=60
2481CONFIG_RCU_CPU_STALL_VERBOSE=y
2482# CONFIG_RCU_CPU_STALL_INFO is not set
2483# CONFIG_RCU_TRACE is not set
2484# CONFIG_BACKTRACE_SELF_TEST is not set
2485# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
2486# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
2487# CONFIG_DEBUG_PER_CPU_MAPS is not set
2488# CONFIG_LKDTM is not set
2489# CONFIG_NOTIFIER_ERROR_INJECTION is not set
2490# CONFIG_FAULT_INJECTION is not set
2491# CONFIG_DEBUG_PAGEALLOC is not set
2492CONFIG_HAVE_FUNCTION_TRACER=y
2493CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
2494CONFIG_HAVE_DYNAMIC_FTRACE=y
2495CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
2496CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
2497CONFIG_HAVE_C_RECORDMCOUNT=y
2498CONFIG_TRACING_SUPPORT=y
2499# CONFIG_FTRACE is not set
2500# CONFIG_RBTREE_TEST is not set
2501# CONFIG_INTERVAL_TREE_TEST is not set
2502CONFIG_DYNAMIC_DEBUG=y
2503# CONFIG_DMA_API_DEBUG is not set
2504# CONFIG_ATOMIC64_SELFTEST is not set
2505# CONFIG_SAMPLES is not set
2506CONFIG_HAVE_ARCH_KGDB=y
2507# CONFIG_KGDB is not set
2508# CONFIG_TEST_KSTRTOX is not set
2509# CONFIG_STRICT_DEVMEM is not set
2510CONFIG_ARM_UNWIND=y
2511# CONFIG_DEBUG_USER is not set
2512CONFIG_DEBUG_LL=y
2513# CONFIG_DEBUG_ZYNQ_UART0 is not set
2514CONFIG_DEBUG_ZYNQ_UART1=y
2515# CONFIG_DEBUG_VEXPRESS_UART0_DETECT is not set
2516# CONFIG_DEBUG_VEXPRESS_UART0_CA9 is not set
2517# CONFIG_DEBUG_VEXPRESS_UART0_RS1 is not set
2518# CONFIG_DEBUG_ICEDCC is not set
2519# CONFIG_DEBUG_SEMIHOSTING is not set
2520CONFIG_DEBUG_LL_INCLUDE="debug/zynq.S"
2521CONFIG_EARLY_PRINTK=y
2522# CONFIG_OC_ETM is not set
2523# CONFIG_PID_IN_CONTEXTIDR is not set
2524
2525#
2526# Security options
2527#
2528# CONFIG_KEYS is not set
2529# CONFIG_SECURITY_DMESG_RESTRICT is not set
2530# CONFIG_SECURITY is not set
2531# CONFIG_SECURITYFS is not set
2532CONFIG_DEFAULT_SECURITY_DAC=y
2533CONFIG_DEFAULT_SECURITY=""
2534CONFIG_CRYPTO=y
2535
2536#
2537# Crypto core or helper
2538#
2539# CONFIG_CRYPTO_FIPS is not set
2540CONFIG_CRYPTO_ALGAPI=y
2541CONFIG_CRYPTO_ALGAPI2=y
2542CONFIG_CRYPTO_HASH=y
2543CONFIG_CRYPTO_HASH2=y
2544CONFIG_CRYPTO_RNG=m
2545CONFIG_CRYPTO_RNG2=m
2546# CONFIG_CRYPTO_MANAGER is not set
2547# CONFIG_CRYPTO_MANAGER2 is not set
2548# CONFIG_CRYPTO_USER is not set
2549# CONFIG_CRYPTO_GF128MUL is not set
2550# CONFIG_CRYPTO_NULL is not set
2551# CONFIG_CRYPTO_PCRYPT is not set
2552# CONFIG_CRYPTO_CRYPTD is not set
2553# CONFIG_CRYPTO_AUTHENC is not set
2554# CONFIG_CRYPTO_TEST is not set
2555
2556#
2557# Authenticated Encryption with Associated Data
2558#
2559# CONFIG_CRYPTO_CCM is not set
2560# CONFIG_CRYPTO_GCM is not set
2561# CONFIG_CRYPTO_SEQIV is not set
2562
2563#
2564# Block modes
2565#
2566# CONFIG_CRYPTO_CBC is not set
2567# CONFIG_CRYPTO_CTR is not set
2568# CONFIG_CRYPTO_CTS is not set
2569# CONFIG_CRYPTO_ECB is not set
2570# CONFIG_CRYPTO_LRW is not set
2571# CONFIG_CRYPTO_PCBC is not set
2572# CONFIG_CRYPTO_XTS is not set
2573
2574#
2575# Hash modes
2576#
2577# CONFIG_CRYPTO_HMAC is not set
2578# CONFIG_CRYPTO_XCBC is not set
2579# CONFIG_CRYPTO_VMAC is not set
2580
2581#
2582# Digest
2583#
2584CONFIG_CRYPTO_CRC32C=y
2585# CONFIG_CRYPTO_GHASH is not set
2586# CONFIG_CRYPTO_MD4 is not set
2587# CONFIG_CRYPTO_MD5 is not set
2588# CONFIG_CRYPTO_MICHAEL_MIC is not set
2589# CONFIG_CRYPTO_RMD128 is not set
2590# CONFIG_CRYPTO_RMD160 is not set
2591# CONFIG_CRYPTO_RMD256 is not set
2592# CONFIG_CRYPTO_RMD320 is not set
2593# CONFIG_CRYPTO_SHA1 is not set
2594# CONFIG_CRYPTO_SHA1_ARM is not set
2595# CONFIG_CRYPTO_SHA256 is not set
2596# CONFIG_CRYPTO_SHA512 is not set
2597# CONFIG_CRYPTO_TGR192 is not set
2598# CONFIG_CRYPTO_WP512 is not set
2599
2600#
2601# Ciphers
2602#
2603CONFIG_CRYPTO_AES=y
2604# CONFIG_CRYPTO_AES_ARM is not set
2605# CONFIG_CRYPTO_ANUBIS is not set
2606# CONFIG_CRYPTO_ARC4 is not set
2607# CONFIG_CRYPTO_BLOWFISH is not set
2608# CONFIG_CRYPTO_CAMELLIA is not set
2609# CONFIG_CRYPTO_CAST5 is not set
2610# CONFIG_CRYPTO_CAST6 is not set
2611# CONFIG_CRYPTO_DES is not set
2612# CONFIG_CRYPTO_FCRYPT is not set
2613# CONFIG_CRYPTO_KHAZAD is not set
2614# CONFIG_CRYPTO_SALSA20 is not set
2615# CONFIG_CRYPTO_SEED is not set
2616# CONFIG_CRYPTO_SERPENT is not set
2617# CONFIG_CRYPTO_TEA is not set
2618# CONFIG_CRYPTO_TWOFISH is not set
2619
2620#
2621# Compression
2622#
2623# CONFIG_CRYPTO_DEFLATE is not set
2624# CONFIG_CRYPTO_ZLIB is not set
2625# CONFIG_CRYPTO_LZO is not set
2626
2627#
2628# Random Number Generation
2629#
2630CONFIG_CRYPTO_ANSI_CPRNG=m
2631# CONFIG_CRYPTO_USER_API_HASH is not set
2632# CONFIG_CRYPTO_USER_API_SKCIPHER is not set
2633CONFIG_CRYPTO_HW=y
2634# CONFIG_CRYPTO_DEV_HIFN_795X is not set
2635# CONFIG_BINARY_PRINTF is not set
2636
2637#
2638# Library routines
2639#
2640CONFIG_BITREVERSE=y
2641CONFIG_GENERIC_STRNCPY_FROM_USER=y
2642CONFIG_GENERIC_STRNLEN_USER=y
2643CONFIG_GENERIC_PCI_IOMAP=y
2644CONFIG_GENERIC_IO=y
2645CONFIG_PERCPU_RWSEM=y
2646# CONFIG_CRC_CCITT is not set
2647CONFIG_CRC16=y
2648# CONFIG_CRC_T10DIF is not set
2649# CONFIG_CRC_ITU_T is not set
2650CONFIG_CRC32=y
2651# CONFIG_CRC32_SELFTEST is not set
2652CONFIG_CRC32_SLICEBY8=y
2653# CONFIG_CRC32_SLICEBY4 is not set
2654# CONFIG_CRC32_SARWATE is not set
2655# CONFIG_CRC32_BIT is not set
2656# CONFIG_CRC7 is not set
2657# CONFIG_LIBCRC32C is not set
2658# CONFIG_CRC8 is not set
2659CONFIG_ZLIB_INFLATE=y
2660CONFIG_ZLIB_DEFLATE=y
2661# CONFIG_XZ_DEC is not set
2662# CONFIG_XZ_DEC_BCJ is not set
2663CONFIG_DECOMPRESS_GZIP=y
2664CONFIG_HAS_IOMEM=y
2665CONFIG_HAS_DMA=y
2666CONFIG_CPU_RMAP=y
2667CONFIG_DQL=y
2668CONFIG_NLATTR=y
2669CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE=y
2670# CONFIG_AVERAGE is not set
2671# CONFIG_CORDIC is not set
2672# CONFIG_DDR is not set
diff --git a/recipes-kernel/linux/linux-xlnx/libtraceevent-Remove-hard-coded-include-to-usr-local.patch b/recipes-kernel/linux/linux-xlnx/libtraceevent-Remove-hard-coded-include-to-usr-local.patch
new file mode 100644
index 00000000..0168ea79
--- /dev/null
+++ b/recipes-kernel/linux/linux-xlnx/libtraceevent-Remove-hard-coded-include-to-usr-local.patch
@@ -0,0 +1,38 @@
1From b9e8c37220c80e78289a1e87b50c09418eb59a7e Mon Sep 17 00:00:00 2001
2From: Jack Mitchell <jack.mitchell@dbbroadcast.co.uk>
3Date: Fri, 8 Mar 2013 11:21:52 +0000
4Subject: [PATCH] libtraceevent: Remove hard coded include to
5 /usr/local/include in Makefile
6
7having /usr/local/include hardcoded into the makefile is not necessary
8as this is automatically included by GCC. It also infects cross-compile
9builds with the host systems includes.
10
11Signed-off-by: Jack Mitchell <jack.mitchell@dbbroadcast.co.uk>
12Acked-by: Namhyung Kim <namhyung@kernel.org>
13Cc: Ingo Molnar <mingo@redhat.com>
14Cc: Paul Mackerras <paulus@samba.org>
15Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
16Link: http://lkml.kernel.org/r/1362741712-21308-1-git-send-email-ml@communistcode.co.uk
17Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
18Upstream-Status: Backport
19---
20 tools/lib/traceevent/Makefile | 2 +-
21 1 file changed, 1 insertion(+), 1 deletion(-)
22
23diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
24index a20e320..0b0a907 100644
25--- a/tools/lib/traceevent/Makefile
26+++ b/tools/lib/traceevent/Makefile
27@@ -122,7 +122,7 @@ export Q VERBOSE
28
29 EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
30
31-INCLUDES = -I. -I/usr/local/include $(CONFIG_INCLUDES)
32+INCLUDES = -I. $(CONFIG_INCLUDES)
33
34 # Set compile option CFLAGS if not set elsewhere
35 CFLAGS ?= -g -Wall
36--
371.7.9.5
38
diff --git a/recipes-kernel/linux/linux-xlnx/patch-3.8.11-rt8-xlnx.patch b/recipes-kernel/linux/linux-xlnx/patch-3.8.11-rt8-xlnx.patch
new file mode 100644
index 00000000..cc622a52
--- /dev/null
+++ b/recipes-kernel/linux/linux-xlnx/patch-3.8.11-rt8-xlnx.patch
@@ -0,0 +1,26332 @@
1diff --git a/Documentation/hwlat_detector.txt b/Documentation/hwlat_detector.txt
2new file mode 100644
3index 0000000..cb61516
4--- /dev/null
5+++ b/Documentation/hwlat_detector.txt
6@@ -0,0 +1,64 @@
7+Introduction:
8+-------------
9+
10+The module hwlat_detector is a special purpose kernel module that is used to
11+detect large system latencies induced by the behavior of certain underlying
12+hardware or firmware, independent of Linux itself. The code was developed
13+originally to detect SMIs (System Management Interrupts) on x86 systems,
14+however there is nothing x86 specific about this patchset. It was
15+originally written for use by the "RT" patch since the Real Time
16+kernel is highly latency sensitive.
17+
18+SMIs are usually not serviced by the Linux kernel, which typically does not
19+even know that they are occuring. SMIs are instead are set up by BIOS code
20+and are serviced by BIOS code, usually for "critical" events such as
21+management of thermal sensors and fans. Sometimes though, SMIs are used for
22+other tasks and those tasks can spend an inordinate amount of time in the
23+handler (sometimes measured in milliseconds). Obviously this is a problem if
24+you are trying to keep event service latencies down in the microsecond range.
25+
26+The hardware latency detector works by hogging all of the cpus for configurable
27+amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
28+for some period, then looking for gaps in the TSC data. Any gap indicates a
29+time when the polling was interrupted and since the machine is stopped and
30+interrupts turned off the only thing that could do that would be an SMI.
31+
32+Note that the SMI detector should *NEVER* be used in a production environment.
33+It is intended to be run manually to determine if the hardware platform has a
34+problem with long system firmware service routines.
35+
36+Usage:
37+------
38+
39+Loading the module hwlat_detector passing the parameter "enabled=1" (or by
40+setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
41+step required to start the hwlat_detector. It is possible to redefine the
42+threshold in microseconds (us) above which latency spikes will be taken
43+into account (parameter "threshold=").
44+
45+Example:
46+
47+ # modprobe hwlat_detector enabled=1 threshold=100
48+
49+After the module is loaded, it creates a directory named "hwlat_detector" under
50+the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
51+to have debugfs mounted, which might be on /sys/debug on your system.
52+
53+The /debug/hwlat_detector interface contains the following files:
54+
55+count - number of latency spikes observed since last reset
56+enable - a global enable/disable toggle (0/1), resets count
57+max - maximum hardware latency actually observed (usecs)
58+sample - a pipe from which to read current raw sample data
59+ in the format <timestamp> <latency observed usecs>
60+ (can be opened O_NONBLOCK for a single sample)
61+threshold - minimum latency value to be considered (usecs)
62+width - time period to sample with CPUs held (usecs)
63+ must be less than the total window size (enforced)
64+window - total period of sampling, width being inside (usecs)
65+
66+By default we will set width to 500,000 and window to 1,000,000, meaning that
67+we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
68+observe any latencies that exceed the threshold (initially 100 usecs),
69+then we write to a global sample ring buffer of 8K samples, which is
70+consumed by reading from the "sample" (pipe) debugfs file interface.
71diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
72index 986614d..1ba0afe 100644
73--- a/Documentation/kernel-parameters.txt
74+++ b/Documentation/kernel-parameters.txt
75@@ -1182,6 +1182,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
76 See comment before ip2_setup() in
77 drivers/char/ip2/ip2base.c.
78
79+ irqaffinity= [SMP] Set the default irq affinity mask
80+ Format:
81+ <cpu number>,...,<cpu number>
82+ or
83+ <cpu number>-<cpu number>
84+ (must be a positive range in ascending order)
85+ or a mixture
86+ <cpu number>,...,<cpu number>-<cpu number>
87+
88 irqfixup [HW]
89 When an interrupt is not handled search all handlers
90 for it. Intended to get systems with badly broken
91diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
92index 2a4cdda..67ac78c 100644
93--- a/Documentation/sysrq.txt
94+++ b/Documentation/sysrq.txt
95@@ -57,10 +57,17 @@ On PowerPC - Press 'ALT - Print Screen (or F13) - <command key>,
96 On other - If you know of the key combos for other architectures, please
97 let me know so I can add them to this section.
98
99-On all - write a character to /proc/sysrq-trigger. e.g.:
100-
101+On all - write a character to /proc/sysrq-trigger, e.g.:
102 echo t > /proc/sysrq-trigger
103
104+On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g.
105+ echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq
106+ Send an ICMP echo request with this pattern plus the particular
107+ SysRq command key. Example:
108+ # ping -c1 -s57 -p0102030468
109+ will trigger the SysRq-H (help) command.
110+
111+
112 * What are the 'command' keys?
113 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
114 'b' - Will immediately reboot the system without syncing or unmounting
115diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt
116new file mode 100644
117index 0000000..6f2aeab
118--- /dev/null
119+++ b/Documentation/trace/histograms.txt
120@@ -0,0 +1,186 @@
121+ Using the Linux Kernel Latency Histograms
122+
123+
124+This document gives a short explanation how to enable, configure and use
125+latency histograms. Latency histograms are primarily relevant in the
126+context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
127+and are used in the quality management of the Linux real-time
128+capabilities.
129+
130+
131+* Purpose of latency histograms
132+
133+A latency histogram continuously accumulates the frequencies of latency
134+data. There are two types of histograms
135+- potential sources of latencies
136+- effective latencies
137+
138+
139+* Potential sources of latencies
140+
141+Potential sources of latencies are code segments where interrupts,
142+preemption or both are disabled (aka critical sections). To create
143+histograms of potential sources of latency, the kernel stores the time
144+stamp at the start of a critical section, determines the time elapsed
145+when the end of the section is reached, and increments the frequency
146+counter of that latency value - irrespective of whether any concurrently
147+running process is affected by latency or not.
148+- Configuration items (in the Kernel hacking/Tracers submenu)
149+ CONFIG_INTERRUPT_OFF_LATENCY
150+ CONFIG_PREEMPT_OFF_LATENCY
151+
152+
153+* Effective latencies
154+
155+Effective latencies are actually occuring during wakeup of a process. To
156+determine effective latencies, the kernel stores the time stamp when a
157+process is scheduled to be woken up, and determines the duration of the
158+wakeup time shortly before control is passed over to this process. Note
159+that the apparent latency in user space may be somewhat longer, since the
160+process may be interrupted after control is passed over to it but before
161+the execution in user space takes place. Simply measuring the interval
162+between enqueuing and wakeup may also not appropriate in cases when a
163+process is scheduled as a result of a timer expiration. The timer may have
164+missed its deadline, e.g. due to disabled interrupts, but this latency
165+would not be registered. Therefore, the offsets of missed timers are
166+recorded in a separate histogram. If both wakeup latency and missed timer
167+offsets are configured and enabled, a third histogram may be enabled that
168+records the overall latency as a sum of the timer latency, if any, and the
169+wakeup latency. This histogram is called "timerandwakeup".
170+- Configuration items (in the Kernel hacking/Tracers submenu)
171+ CONFIG_WAKEUP_LATENCY
172+ CONFIG_MISSED_TIMER_OFSETS
173+
174+
175+* Usage
176+
177+The interface to the administration of the latency histograms is located
178+in the debugfs file system. To mount it, either enter
179+
180+mount -t sysfs nodev /sys
181+mount -t debugfs nodev /sys/kernel/debug
182+
183+from shell command line level, or add
184+
185+nodev /sys sysfs defaults 0 0
186+nodev /sys/kernel/debug debugfs defaults 0 0
187+
188+to the file /etc/fstab. All latency histogram related files are then
189+available in the directory /sys/kernel/debug/tracing/latency_hist. A
190+particular histogram type is enabled by writing non-zero to the related
191+variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
192+Select "preemptirqsoff" for the histograms of potential sources of
193+latencies and "wakeup" for histograms of effective latencies etc. The
194+histogram data - one per CPU - are available in the files
195+
196+/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
197+/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
198+/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
199+/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
200+/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
201+/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
202+/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
203+
204+The histograms are reset by writing non-zero to the file "reset" in a
205+particular latency directory. To reset all latency data, use
206+
207+#!/bin/sh
208+
209+TRACINGDIR=/sys/kernel/debug/tracing
210+HISTDIR=$TRACINGDIR/latency_hist
211+
212+if test -d $HISTDIR
213+then
214+ cd $HISTDIR
215+ for i in `find . | grep /reset$`
216+ do
217+ echo 1 >$i
218+ done
219+fi
220+
221+
222+* Data format
223+
224+Latency data are stored with a resolution of one microsecond. The
225+maximum latency is 10,240 microseconds. The data are only valid, if the
226+overflow register is empty. Every output line contains the latency in
227+microseconds in the first row and the number of samples in the second
228+row. To display only lines with a positive latency count, use, for
229+example,
230+
231+grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
232+
233+#Minimum latency: 0 microseconds.
234+#Average latency: 0 microseconds.
235+#Maximum latency: 25 microseconds.
236+#Total samples: 3104770694
237+#There are 0 samples greater or equal than 10240 microseconds
238+#usecs samples
239+ 0 2984486876
240+ 1 49843506
241+ 2 58219047
242+ 3 5348126
243+ 4 2187960
244+ 5 3388262
245+ 6 959289
246+ 7 208294
247+ 8 40420
248+ 9 4485
249+ 10 14918
250+ 11 18340
251+ 12 25052
252+ 13 19455
253+ 14 5602
254+ 15 969
255+ 16 47
256+ 17 18
257+ 18 14
258+ 19 1
259+ 20 3
260+ 21 2
261+ 22 5
262+ 23 2
263+ 25 1
264+
265+
266+* Wakeup latency of a selected process
267+
268+To only collect wakeup latency data of a particular process, write the
269+PID of the requested process to
270+
271+/sys/kernel/debug/tracing/latency_hist/wakeup/pid
272+
273+PIDs are not considered, if this variable is set to 0.
274+
275+
276+* Details of the process with the highest wakeup latency so far
277+
278+Selected data of the process that suffered from the highest wakeup
279+latency that occurred in a particular CPU are available in the file
280+
281+/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
282+
283+In addition, other relevant system data at the time when the
284+latency occurred are given.
285+
286+The format of the data is (all in one line):
287+<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
288+<- <PID> <Priority> <Command> <Timestamp>
289+
290+The value of <Timeroffset> is only relevant in the combined timer
291+and wakeup latency recording. In the wakeup recording, it is
292+always 0, in the missed_timer_offsets recording, it is the same
293+as <Latency>.
294+
295+When retrospectively searching for the origin of a latency and
296+tracing was not enabled, it may be helpful to know the name and
297+some basic data of the task that (finally) was switching to the
298+late real-tlme task. In addition to the victim's data, also the
299+data of the possible culprit are therefore displayed after the
300+"<-" symbol.
301+
302+Finally, the timestamp of the time when the latency occurred
303+in <seconds>.<microseconds> after the most recent system boot
304+is provided.
305+
306+These data are also reset when the wakeup histogram is reset.
307diff --git a/arch/Kconfig b/arch/Kconfig
308index 7f8f281..4a93e44 100644
309--- a/arch/Kconfig
310+++ b/arch/Kconfig
311@@ -6,6 +6,7 @@ config OPROFILE
312 tristate "OProfile system profiling"
313 depends on PROFILING
314 depends on HAVE_OPROFILE
315+ depends on !PREEMPT_RT_FULL
316 select RING_BUFFER
317 select RING_BUFFER_ALLOW_SWAP
318 help
319diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
320index 0c4132d..fb9eaa4 100644
321--- a/arch/alpha/mm/fault.c
322+++ b/arch/alpha/mm/fault.c
323@@ -108,7 +108,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
324
325 /* If we're in an interrupt context, or have no user context,
326 we must not take the fault. */
327- if (!mm || in_atomic())
328+ if (!mm || pagefault_disabled())
329 goto no_context;
330
331 #ifdef CONFIG_ALPHA_LARGE_VMALLOC
332diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
333index 5e86f1e..875b46d 100644
334--- a/arch/arm/Kconfig
335+++ b/arch/arm/Kconfig
336@@ -17,6 +17,7 @@ config ARM
337 select GENERIC_STRNCPY_FROM_USER
338 select GENERIC_STRNLEN_USER
339 select HARDIRQS_SW_RESEND
340+ select IRQ_FORCED_THREADING
341 select HAVE_AOUT
342 select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
343 select HAVE_ARCH_KGDB
344@@ -46,6 +47,7 @@ config ARM
345 select HAVE_MEMBLOCK
346 select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
347 select HAVE_PERF_EVENTS
348+ select HAVE_PREEMPT_LAZY
349 select HAVE_REGS_AND_STACK_ACCESS_API
350 select HAVE_SYSCALL_TRACEPOINTS
351 select HAVE_UID16
352diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
353index c2f14e8..91fe4f1 100644
354--- a/arch/arm/boot/dts/am33xx.dtsi
355+++ b/arch/arm/boot/dts/am33xx.dtsi
356@@ -349,7 +349,7 @@
357 rx_descs = <64>;
358 mac_control = <0x20>;
359 slaves = <2>;
360- cpts_active_slave = <0>;
361+ active_slave = <0>;
362 cpts_clock_mult = <0x80000000>;
363 cpts_clock_shift = <29>;
364 reg = <0x4a100000 0x800
365@@ -385,5 +385,19 @@
366 mac-address = [ 00 00 00 00 00 00 ];
367 };
368 };
369+
370+ ocmcram: ocmcram@40300000 {
371+ compatible = "ti,am3352-ocmcram";
372+ reg = <0x40300000 0x10000>;
373+ ti,hwmods = "ocmcram";
374+ ti,no_idle_on_suspend;
375+ };
376+
377+ wkup_m3: wkup_m3@44d00000 {
378+ compatible = "ti,am3353-wkup-m3";
379+ reg = <0x44d00000 0x4000 /* M3 UMEM */
380+ 0x44d80000 0x2000>; /* M3 DMEM */
381+ ti,hwmods = "wkup_m3";
382+ };
383 };
384 };
385diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
386index fa09e6b..fbd0ba7 100644
387--- a/arch/arm/include/asm/switch_to.h
388+++ b/arch/arm/include/asm/switch_to.h
389@@ -3,6 +3,14 @@
390
391 #include <linux/thread_info.h>
392
393+#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
394+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
395+#else
396+static inline void
397+switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
398+#endif
399+
400+
401 /*
402 * switch_to(prev, next) should switch from task `prev' to `next'
403 * `prev' will never be the same as `next'. schedule() itself
404@@ -12,6 +20,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info
405
406 #define switch_to(prev,next,last) \
407 do { \
408+ switch_kmaps(prev, next); \
409 last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
410 } while (0)
411
412diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
413index cddda1f..4acacb2 100644
414--- a/arch/arm/include/asm/thread_info.h
415+++ b/arch/arm/include/asm/thread_info.h
416@@ -50,6 +50,7 @@ struct cpu_context_save {
417 struct thread_info {
418 unsigned long flags; /* low level flags */
419 int preempt_count; /* 0 => preemptable, <0 => bug */
420+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
421 mm_segment_t addr_limit; /* address limit */
422 struct task_struct *task; /* main task structure */
423 struct exec_domain *exec_domain; /* execution domain */
424@@ -148,6 +149,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
425 #define TIF_SIGPENDING 0
426 #define TIF_NEED_RESCHED 1
427 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
428+#define TIF_NEED_RESCHED_LAZY 3
429 #define TIF_SYSCALL_TRACE 8
430 #define TIF_SYSCALL_AUDIT 9
431 #define TIF_SYSCALL_TRACEPOINT 10
432@@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
433 #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
434 #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
435 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
436+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
437 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
438 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
439 #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
440diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
441index cf10d18..8b1d153 100644
442--- a/arch/arm/kernel/asm-offsets.c
443+++ b/arch/arm/kernel/asm-offsets.c
444@@ -50,6 +50,7 @@ int main(void)
445 BLANK();
446 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
447 DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
448+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
449 DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
450 DEFINE(TI_TASK, offsetof(struct thread_info, task));
451 DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain));
452diff --git a/arch/arm/kernel/early_printk.c b/arch/arm/kernel/early_printk.c
453index 85aa2b2..4307653 100644
454--- a/arch/arm/kernel/early_printk.c
455+++ b/arch/arm/kernel/early_printk.c
456@@ -29,28 +29,17 @@ static void early_console_write(struct console *con, const char *s, unsigned n)
457 early_write(s, n);
458 }
459
460-static struct console early_console = {
461+static struct console early_console_dev = {
462 .name = "earlycon",
463 .write = early_console_write,
464 .flags = CON_PRINTBUFFER | CON_BOOT,
465 .index = -1,
466 };
467
468-asmlinkage void early_printk(const char *fmt, ...)
469-{
470- char buf[512];
471- int n;
472- va_list ap;
473-
474- va_start(ap, fmt);
475- n = vscnprintf(buf, sizeof(buf), fmt, ap);
476- early_write(buf, n);
477- va_end(ap);
478-}
479-
480 static int __init setup_early_printk(char *buf)
481 {
482- register_console(&early_console);
483+ early_console = &early_console_dev;
484+ register_console(&early_console_dev);
485 return 0;
486 }
487
488diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
489index 442763d..4cd7d58b 100644
490--- a/arch/arm/kernel/entry-armv.S
491+++ b/arch/arm/kernel/entry-armv.S
492@@ -216,11 +216,18 @@ __irq_svc:
493 #ifdef CONFIG_PREEMPT
494 get_thread_info tsk
495 ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
496- ldr r0, [tsk, #TI_FLAGS] @ get flags
497 teq r8, #0 @ if preempt count != 0
498+ bne 1f @ return from exeption
499+ ldr r0, [tsk, #TI_FLAGS] @ get flags
500+ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
501+ blne svc_preempt @ preempt!
502+
503+ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
504+ teq r8, #0 @ if preempt lazy count != 0
505 movne r0, #0 @ force flags to 0
506- tst r0, #_TIF_NEED_RESCHED
507+ tst r0, #_TIF_NEED_RESCHED_LAZY
508 blne svc_preempt
509+1:
510 #endif
511
512 #ifdef CONFIG_TRACE_IRQFLAGS
513@@ -240,6 +247,8 @@ svc_preempt:
514 1: bl preempt_schedule_irq @ irq en/disable is done inside
515 ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
516 tst r0, #_TIF_NEED_RESCHED
517+ bne 1b
518+ tst r0, #_TIF_NEED_RESCHED_LAZY
519 moveq pc, r8 @ go again
520 b 1b
521 #endif
522diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
523index 5f66206..aa1b171 100644
524--- a/arch/arm/kernel/perf_event_cpu.c
525+++ b/arch/arm/kernel/perf_event_cpu.c
526@@ -118,7 +118,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
527 continue;
528 }
529
530- err = request_irq(irq, handler, IRQF_NOBALANCING, "arm-pmu",
531+ err = request_irq(irq, handler,
532+ IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
533 cpu_pmu);
534 if (err) {
535 pr_err("unable to request IRQ%d for ARM PMU counters\n",
536diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
537index c6dec5f..43ac178 100644
538--- a/arch/arm/kernel/process.c
539+++ b/arch/arm/kernel/process.c
540@@ -459,6 +459,31 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
541 }
542
543 #ifdef CONFIG_MMU
544+
545+/*
546+ * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
547+ * initialized by pgtable_page_ctor() then a coredump of the vector page will
548+ * fail.
549+ */
550+static int __init vectors_user_mapping_init_page(void)
551+{
552+ struct page *page;
553+ unsigned long addr = 0xffff0000;
554+ pgd_t *pgd;
555+ pud_t *pud;
556+ pmd_t *pmd;
557+
558+ pgd = pgd_offset_k(addr);
559+ pud = pud_offset(pgd, addr);
560+ pmd = pmd_offset(pud, addr);
561+ page = pmd_page(*(pmd));
562+
563+ pgtable_page_ctor(page);
564+
565+ return 0;
566+}
567+late_initcall(vectors_user_mapping_init_page);
568+
569 /*
570 * The vectors page is always readable from user space for the
571 * atomic helpers and the signal restart code. Insert it into the
572diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
573index 56f72d2..806416a 100644
574--- a/arch/arm/kernel/signal.c
575+++ b/arch/arm/kernel/signal.c
576@@ -638,7 +638,8 @@ asmlinkage int
577 do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
578 {
579 do {
580- if (likely(thread_flags & _TIF_NEED_RESCHED)) {
581+ if (likely(thread_flags & (_TIF_NEED_RESCHED |
582+ _TIF_NEED_RESCHED_LAZY))) {
583 schedule();
584 } else {
585 if (unlikely(!user_mode(regs)))
586diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c
587index cafe988..0c36dcd 100644
588--- a/arch/arm/mach-at91/at91rm9200_time.c
589+++ b/arch/arm/mach-at91/at91rm9200_time.c
590@@ -134,6 +134,7 @@ clkevt32k_mode(enum clock_event_mode mode, struct clock_event_device *dev)
591 break;
592 case CLOCK_EVT_MODE_SHUTDOWN:
593 case CLOCK_EVT_MODE_UNUSED:
594+ remove_irq(AT91_ID_SYS, &at91rm9200_timer_irq);
595 case CLOCK_EVT_MODE_RESUME:
596 irqmask = 0;
597 break;
598diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c
599index 358412f..a6f9751 100644
600--- a/arch/arm/mach-at91/at91sam926x_time.c
601+++ b/arch/arm/mach-at91/at91sam926x_time.c
602@@ -77,7 +77,7 @@ static struct clocksource pit_clk = {
603 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
604 };
605
606-
607+static struct irqaction at91sam926x_pit_irq;
608 /*
609 * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
610 */
611@@ -86,6 +86,8 @@ pit_clkevt_mode(enum clock_event_mode mode, struct clock_event_device *dev)
612 {
613 switch (mode) {
614 case CLOCK_EVT_MODE_PERIODIC:
615+ /* Set up irq handler */
616+ setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
617 /* update clocksource counter */
618 pit_cnt += pit_cycle * PIT_PICNT(pit_read(AT91_PIT_PIVR));
619 pit_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN
620@@ -98,6 +100,7 @@ pit_clkevt_mode(enum clock_event_mode mode, struct clock_event_device *dev)
621 case CLOCK_EVT_MODE_UNUSED:
622 /* disable irq, leaving the clocksource active */
623 pit_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN);
624+ remove_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
625 break;
626 case CLOCK_EVT_MODE_RESUME:
627 break;
628diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
629index c5c840e..d044812 100644
630--- a/arch/arm/mach-exynos/platsmp.c
631+++ b/arch/arm/mach-exynos/platsmp.c
632@@ -71,7 +71,7 @@ static void __iomem *scu_base_addr(void)
633 return (void __iomem *)(S5P_VA_SCU);
634 }
635
636-static DEFINE_SPINLOCK(boot_lock);
637+static DEFINE_RAW_SPINLOCK(boot_lock);
638
639 static void __cpuinit exynos_secondary_init(unsigned int cpu)
640 {
641@@ -91,8 +91,8 @@ static void __cpuinit exynos_secondary_init(unsigned int cpu)
642 /*
643 * Synchronise with the boot thread.
644 */
645- spin_lock(&boot_lock);
646- spin_unlock(&boot_lock);
647+ raw_spin_lock(&boot_lock);
648+ raw_spin_unlock(&boot_lock);
649 }
650
651 static int __cpuinit exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
652@@ -104,7 +104,7 @@ static int __cpuinit exynos_boot_secondary(unsigned int cpu, struct task_struct
653 * Set synchronisation state between this boot processor
654 * and the secondary one
655 */
656- spin_lock(&boot_lock);
657+ raw_spin_lock(&boot_lock);
658
659 /*
660 * The secondary processor is waiting to be released from
661@@ -133,7 +133,7 @@ static int __cpuinit exynos_boot_secondary(unsigned int cpu, struct task_struct
662
663 if (timeout == 0) {
664 printk(KERN_ERR "cpu1 power enable failed");
665- spin_unlock(&boot_lock);
666+ raw_spin_unlock(&boot_lock);
667 return -ETIMEDOUT;
668 }
669 }
670@@ -161,7 +161,7 @@ static int __cpuinit exynos_boot_secondary(unsigned int cpu, struct task_struct
671 * now the secondary core is starting up let it run its
672 * calibrations, then wait for it to finish
673 */
674- spin_unlock(&boot_lock);
675+ raw_spin_unlock(&boot_lock);
676
677 return pen_release != -1 ? -ENOSYS : 0;
678 }
679diff --git a/arch/arm/mach-msm/platsmp.c b/arch/arm/mach-msm/platsmp.c
680index 7ed69b69..7b8043a 100644
681--- a/arch/arm/mach-msm/platsmp.c
682+++ b/arch/arm/mach-msm/platsmp.c
683@@ -31,7 +31,7 @@
684
685 extern void msm_secondary_startup(void);
686
687-static DEFINE_SPINLOCK(boot_lock);
688+static DEFINE_RAW_SPINLOCK(boot_lock);
689
690 static inline int get_core_count(void)
691 {
692@@ -58,8 +58,8 @@ static void __cpuinit msm_secondary_init(unsigned int cpu)
693 /*
694 * Synchronise with the boot thread.
695 */
696- spin_lock(&boot_lock);
697- spin_unlock(&boot_lock);
698+ raw_spin_lock(&boot_lock);
699+ raw_spin_unlock(&boot_lock);
700 }
701
702 static __cpuinit void prepare_cold_cpu(unsigned int cpu)
703@@ -96,7 +96,7 @@ static int __cpuinit msm_boot_secondary(unsigned int cpu, struct task_struct *id
704 * set synchronisation state between this boot processor
705 * and the secondary one
706 */
707- spin_lock(&boot_lock);
708+ raw_spin_lock(&boot_lock);
709
710 /*
711 * The secondary processor is waiting to be released from
712@@ -130,7 +130,7 @@ static int __cpuinit msm_boot_secondary(unsigned int cpu, struct task_struct *id
713 * now the secondary core is starting up let it run its
714 * calibrations, then wait for it to finish
715 */
716- spin_unlock(&boot_lock);
717+ raw_spin_unlock(&boot_lock);
718
719 return pen_release != -1 ? -ENOSYS : 0;
720 }
721diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
722index cd42d92..ef1fbcc 100644
723--- a/arch/arm/mach-omap2/omap-smp.c
724+++ b/arch/arm/mach-omap2/omap-smp.c
725@@ -45,7 +45,7 @@ u16 pm44xx_errata;
726 /* SCU base address */
727 static void __iomem *scu_base;
728
729-static DEFINE_SPINLOCK(boot_lock);
730+static DEFINE_RAW_SPINLOCK(boot_lock);
731
732 void __iomem *omap4_get_scu_base(void)
733 {
734@@ -76,8 +76,8 @@ static void __cpuinit omap4_secondary_init(unsigned int cpu)
735 /*
736 * Synchronise with the boot thread.
737 */
738- spin_lock(&boot_lock);
739- spin_unlock(&boot_lock);
740+ raw_spin_lock(&boot_lock);
741+ raw_spin_unlock(&boot_lock);
742 }
743
744 static int __cpuinit omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
745@@ -90,7 +90,7 @@ static int __cpuinit omap4_boot_secondary(unsigned int cpu, struct task_struct *
746 * Set synchronisation state between this boot processor
747 * and the secondary one
748 */
749- spin_lock(&boot_lock);
750+ raw_spin_lock(&boot_lock);
751
752 /*
753 * Update the AuxCoreBoot0 with boot state for secondary core.
754@@ -163,7 +163,7 @@ static int __cpuinit omap4_boot_secondary(unsigned int cpu, struct task_struct *
755 * Now the secondary core is starting up let it run its
756 * calibrations, then wait for it to finish
757 */
758- spin_unlock(&boot_lock);
759+ raw_spin_unlock(&boot_lock);
760
761 return 0;
762 }
763diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
764index 5d3b4f4..8633a43 100644
765--- a/arch/arm/mach-omap2/omap-wakeupgen.c
766+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
767@@ -46,7 +46,7 @@
768
769 static void __iomem *wakeupgen_base;
770 static void __iomem *sar_base;
771-static DEFINE_SPINLOCK(wakeupgen_lock);
772+static DEFINE_RAW_SPINLOCK(wakeupgen_lock);
773 static unsigned int irq_target_cpu[MAX_IRQS];
774 static unsigned int irq_banks = MAX_NR_REG_BANKS;
775 static unsigned int max_irqs = MAX_IRQS;
776@@ -134,9 +134,9 @@ static void wakeupgen_mask(struct irq_data *d)
777 {
778 unsigned long flags;
779
780- spin_lock_irqsave(&wakeupgen_lock, flags);
781+ raw_spin_lock_irqsave(&wakeupgen_lock, flags);
782 _wakeupgen_clear(d->irq, irq_target_cpu[d->irq]);
783- spin_unlock_irqrestore(&wakeupgen_lock, flags);
784+ raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
785 }
786
787 /*
788@@ -146,9 +146,9 @@ static void wakeupgen_unmask(struct irq_data *d)
789 {
790 unsigned long flags;
791
792- spin_lock_irqsave(&wakeupgen_lock, flags);
793+ raw_spin_lock_irqsave(&wakeupgen_lock, flags);
794 _wakeupgen_set(d->irq, irq_target_cpu[d->irq]);
795- spin_unlock_irqrestore(&wakeupgen_lock, flags);
796+ raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
797 }
798
799 #ifdef CONFIG_HOTPLUG_CPU
800@@ -189,7 +189,7 @@ static void wakeupgen_irqmask_all(unsigned int cpu, unsigned int set)
801 {
802 unsigned long flags;
803
804- spin_lock_irqsave(&wakeupgen_lock, flags);
805+ raw_spin_lock_irqsave(&wakeupgen_lock, flags);
806 if (set) {
807 _wakeupgen_save_masks(cpu);
808 _wakeupgen_set_all(cpu, WKG_MASK_ALL);
809@@ -197,7 +197,7 @@ static void wakeupgen_irqmask_all(unsigned int cpu, unsigned int set)
810 _wakeupgen_set_all(cpu, WKG_UNMASK_ALL);
811 _wakeupgen_restore_masks(cpu);
812 }
813- spin_unlock_irqrestore(&wakeupgen_lock, flags);
814+ raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
815 }
816 #endif
817
818diff --git a/arch/arm/mach-spear13xx/platsmp.c b/arch/arm/mach-spear13xx/platsmp.c
819index 2eaa3fa..bb61b1f 100644
820--- a/arch/arm/mach-spear13xx/platsmp.c
821+++ b/arch/arm/mach-spear13xx/platsmp.c
822@@ -21,7 +21,7 @@
823 #include <mach/spear.h>
824 #include <mach/generic.h>
825
826-static DEFINE_SPINLOCK(boot_lock);
827+static DEFINE_RAW_SPINLOCK(boot_lock);
828
829 static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
830
831@@ -44,8 +44,8 @@ static void __cpuinit spear13xx_secondary_init(unsigned int cpu)
832 /*
833 * Synchronise with the boot thread.
834 */
835- spin_lock(&boot_lock);
836- spin_unlock(&boot_lock);
837+ raw_spin_lock(&boot_lock);
838+ raw_spin_unlock(&boot_lock);
839 }
840
841 static int __cpuinit spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
842@@ -56,7 +56,7 @@ static int __cpuinit spear13xx_boot_secondary(unsigned int cpu, struct task_stru
843 * set synchronisation state between this boot processor
844 * and the secondary one
845 */
846- spin_lock(&boot_lock);
847+ raw_spin_lock(&boot_lock);
848
849 /*
850 * The secondary processor is waiting to be released from
851@@ -83,7 +83,7 @@ static int __cpuinit spear13xx_boot_secondary(unsigned int cpu, struct task_stru
852 * now the secondary core is starting up let it run its
853 * calibrations, then wait for it to finish
854 */
855- spin_unlock(&boot_lock);
856+ raw_spin_unlock(&boot_lock);
857
858 return pen_release != -1 ? -ENOSYS : 0;
859 }
860diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c
861index 3db7782..dcecae1 100644
862--- a/arch/arm/mach-ux500/platsmp.c
863+++ b/arch/arm/mach-ux500/platsmp.c
864@@ -50,7 +50,7 @@ static void __iomem *scu_base_addr(void)
865 return NULL;
866 }
867
868-static DEFINE_SPINLOCK(boot_lock);
869+static DEFINE_RAW_SPINLOCK(boot_lock);
870
871 static void __cpuinit ux500_secondary_init(unsigned int cpu)
872 {
873@@ -70,8 +70,8 @@ static void __cpuinit ux500_secondary_init(unsigned int cpu)
874 /*
875 * Synchronise with the boot thread.
876 */
877- spin_lock(&boot_lock);
878- spin_unlock(&boot_lock);
879+ raw_spin_lock(&boot_lock);
880+ raw_spin_unlock(&boot_lock);
881 }
882
883 static int __cpuinit ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
884@@ -82,7 +82,7 @@ static int __cpuinit ux500_boot_secondary(unsigned int cpu, struct task_struct *
885 * set synchronisation state between this boot processor
886 * and the secondary one
887 */
888- spin_lock(&boot_lock);
889+ raw_spin_lock(&boot_lock);
890
891 /*
892 * The secondary processor is waiting to be released from
893@@ -103,7 +103,7 @@ static int __cpuinit ux500_boot_secondary(unsigned int cpu, struct task_struct *
894 * now the secondary core is starting up let it run its
895 * calibrations, then wait for it to finish
896 */
897- spin_unlock(&boot_lock);
898+ raw_spin_unlock(&boot_lock);
899
900 return pen_release != -1 ? -ENOSYS : 0;
901 }
902diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
903index 5dbf13f..149bab5 100644
904--- a/arch/arm/mm/fault.c
905+++ b/arch/arm/mm/fault.c
906@@ -279,7 +279,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
907 * If we're in an interrupt or have no user
908 * context, we must not take the fault..
909 */
910- if (in_atomic() || !mm)
911+ if (!mm || pagefault_disabled())
912 goto no_context;
913
914 /*
915diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
916index 21b9e1b..bd41dd8 100644
917--- a/arch/arm/mm/highmem.c
918+++ b/arch/arm/mm/highmem.c
919@@ -38,6 +38,7 @@ EXPORT_SYMBOL(kunmap);
920
921 void *kmap_atomic(struct page *page)
922 {
923+ pte_t pte = mk_pte(page, kmap_prot);
924 unsigned int idx;
925 unsigned long vaddr;
926 void *kmap;
927@@ -76,7 +77,10 @@ void *kmap_atomic(struct page *page)
928 * in place, so the contained TLB flush ensures the TLB is updated
929 * with the new mapping.
930 */
931- set_top_pte(vaddr, mk_pte(page, kmap_prot));
932+#ifdef CONFIG_PREEMPT_RT_FULL
933+ current->kmap_pte[type] = pte;
934+#endif
935+ set_top_pte(vaddr, pte);
936
937 return (void *)vaddr;
938 }
939@@ -93,12 +97,15 @@ void __kunmap_atomic(void *kvaddr)
940
941 if (cache_is_vivt())
942 __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
943+#ifdef CONFIG_PREEMPT_RT_FULL
944+ current->kmap_pte[type] = __pte(0);
945+#endif
946 #ifdef CONFIG_DEBUG_HIGHMEM
947 BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
948- set_top_pte(vaddr, __pte(0));
949 #else
950 (void) idx; /* to kill a warning */
951 #endif
952+ set_top_pte(vaddr, __pte(0));
953 kmap_atomic_idx_pop();
954 } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
955 /* this address was obtained through kmap_high_get() */
956@@ -110,6 +117,7 @@ EXPORT_SYMBOL(__kunmap_atomic);
957
958 void *kmap_atomic_pfn(unsigned long pfn)
959 {
960+ pte_t pte = pfn_pte(pfn, kmap_prot);
961 unsigned long vaddr;
962 int idx, type;
963
964@@ -121,7 +129,10 @@ void *kmap_atomic_pfn(unsigned long pfn)
965 #ifdef CONFIG_DEBUG_HIGHMEM
966 BUG_ON(!pte_none(get_top_pte(vaddr)));
967 #endif
968- set_top_pte(vaddr, pfn_pte(pfn, kmap_prot));
969+#ifdef CONFIG_PREEMPT_RT_FULL
970+ current->kmap_pte[type] = pte;
971+#endif
972+ set_top_pte(vaddr, pte);
973
974 return (void *)vaddr;
975 }
976@@ -135,3 +146,29 @@ struct page *kmap_atomic_to_page(const void *ptr)
977
978 return pte_page(get_top_pte(vaddr));
979 }
980+
981+#if defined CONFIG_PREEMPT_RT_FULL
982+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
983+{
984+ int i;
985+
986+ /*
987+ * Clear @prev's kmap_atomic mappings
988+ */
989+ for (i = 0; i < prev_p->kmap_idx; i++) {
990+ int idx = i + KM_TYPE_NR * smp_processor_id();
991+
992+ set_top_pte(__fix_to_virt(FIX_KMAP_BEGIN + idx), __pte(0));
993+ }
994+ /*
995+ * Restore @next_p's kmap_atomic mappings
996+ */
997+ for (i = 0; i < next_p->kmap_idx; i++) {
998+ int idx = i + KM_TYPE_NR * smp_processor_id();
999+
1000+ if (!pte_none(next_p->kmap_pte[i]))
1001+ set_top_pte(__fix_to_virt(FIX_KMAP_BEGIN + idx),
1002+ next_p->kmap_pte[i]);
1003+ }
1004+}
1005+#endif
1006diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c
1007index 04ca493..20f9044 100644
1008--- a/arch/arm/plat-versatile/platsmp.c
1009+++ b/arch/arm/plat-versatile/platsmp.c
1010@@ -32,7 +32,7 @@ static void __cpuinit write_pen_release(int val)
1011 outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
1012 }
1013
1014-static DEFINE_SPINLOCK(boot_lock);
1015+static DEFINE_RAW_SPINLOCK(boot_lock);
1016
1017 void __cpuinit versatile_secondary_init(unsigned int cpu)
1018 {
1019@@ -52,8 +52,8 @@ void __cpuinit versatile_secondary_init(unsigned int cpu)
1020 /*
1021 * Synchronise with the boot thread.
1022 */
1023- spin_lock(&boot_lock);
1024- spin_unlock(&boot_lock);
1025+ raw_spin_lock(&boot_lock);
1026+ raw_spin_unlock(&boot_lock);
1027 }
1028
1029 int __cpuinit versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
1030@@ -64,7 +64,7 @@ int __cpuinit versatile_boot_secondary(unsigned int cpu, struct task_struct *idl
1031 * Set synchronisation state between this boot processor
1032 * and the secondary one
1033 */
1034- spin_lock(&boot_lock);
1035+ raw_spin_lock(&boot_lock);
1036
1037 /*
1038 * This is really belt and braces; we hold unintended secondary
1039@@ -94,7 +94,7 @@ int __cpuinit versatile_boot_secondary(unsigned int cpu, struct task_struct *idl
1040 * now the secondary core is starting up let it run its
1041 * calibrations, then wait for it to finish
1042 */
1043- spin_unlock(&boot_lock);
1044+ raw_spin_unlock(&boot_lock);
1045
1046 return pen_release != -1 ? -ENOSYS : 0;
1047 }
1048diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
1049index b2f2d2d..9577e69 100644
1050--- a/arch/avr32/mm/fault.c
1051+++ b/arch/avr32/mm/fault.c
1052@@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
1053 * If we're in an interrupt or have no user context, we must
1054 * not take the fault...
1055 */
1056- if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
1057+ if (!mm || regs->sr & SYSREG_BIT(GM) || pagefault_disabled())
1058 goto no_context;
1059
1060 local_irq_enable();
1061diff --git a/arch/blackfin/kernel/early_printk.c b/arch/blackfin/kernel/early_printk.c
1062index 84ed837..61fbd2d 100644
1063--- a/arch/blackfin/kernel/early_printk.c
1064+++ b/arch/blackfin/kernel/early_printk.c
1065@@ -25,8 +25,6 @@ extern struct console *bfin_earlyserial_init(unsigned int port,
1066 extern struct console *bfin_jc_early_init(void);
1067 #endif
1068
1069-static struct console *early_console;
1070-
1071 /* Default console */
1072 #define DEFAULT_PORT 0
1073 #define DEFAULT_CFLAG CS8|B57600
1074diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
1075index 73312ab..1a403d9 100644
1076--- a/arch/cris/mm/fault.c
1077+++ b/arch/cris/mm/fault.c
1078@@ -114,7 +114,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
1079 * user context, we must not take the fault.
1080 */
1081
1082- if (in_atomic() || !mm)
1083+ if (!mm || pagefault_disabled())
1084 goto no_context;
1085
1086 retry:
1087diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c
1088index 331c1e2..e87972c 100644
1089--- a/arch/frv/mm/fault.c
1090+++ b/arch/frv/mm/fault.c
1091@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
1092 * If we're in an interrupt or have no user
1093 * context, we must not take the fault..
1094 */
1095- if (in_atomic() || !mm)
1096+ if (!mm || pagefault_disabled())
1097 goto no_context;
1098
1099 down_read(&mm->mmap_sem);
1100diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
1101index 6cf0341..dd88415 100644
1102--- a/arch/ia64/mm/fault.c
1103+++ b/arch/ia64/mm/fault.c
1104@@ -98,7 +98,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
1105 /*
1106 * If we're in an interrupt or have no user context, we must not take the fault..
1107 */
1108- if (in_atomic() || !mm)
1109+ if (!mm || pagefault_disabled())
1110 goto no_context;
1111
1112 #ifdef CONFIG_VIRTUAL_MEM_MAP
1113diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
1114index 3cdfa9c..6945056 100644
1115--- a/arch/m32r/mm/fault.c
1116+++ b/arch/m32r/mm/fault.c
1117@@ -114,7 +114,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
1118 * If we're in an interrupt or have no user context or are running in an
1119 * atomic region then we must not take the fault..
1120 */
1121- if (in_atomic() || !mm)
1122+ if (!mm || pagefault_disabled())
1123 goto bad_area_nosemaphore;
1124
1125 /* When running in the kernel we expect faults to occur only to
1126diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
1127index a563727..9ea40db 100644
1128--- a/arch/m68k/mm/fault.c
1129+++ b/arch/m68k/mm/fault.c
1130@@ -85,7 +85,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
1131 * If we're in an interrupt or have no user
1132 * context, we must not take the fault..
1133 */
1134- if (in_atomic() || !mm)
1135+ if (!mm || pagefault_disabled())
1136 goto no_context;
1137
1138 retry:
1139diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
1140index 731f739..81bb846 100644
1141--- a/arch/microblaze/mm/fault.c
1142+++ b/arch/microblaze/mm/fault.c
1143@@ -108,7 +108,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
1144 if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
1145 is_write = 0;
1146
1147- if (unlikely(in_atomic() || !mm)) {
1148+ if (unlikely(!mm || pagefault_disabled())) {
1149 if (kernel_mode(regs))
1150 goto bad_area_nosemaphore;
1151
1152diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
1153index 2ac626a..1c355a1 100644
1154--- a/arch/mips/Kconfig
1155+++ b/arch/mips/Kconfig
1156@@ -2102,7 +2102,7 @@ config CPU_R4400_WORKAROUNDS
1157 #
1158 config HIGHMEM
1159 bool "High Memory Support"
1160- depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM
1161+ depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !PREEMPT_RT_FULL
1162
1163 config CPU_SUPPORTS_HIGHMEM
1164 bool
1165diff --git a/arch/mips/kernel/early_printk.c b/arch/mips/kernel/early_printk.c
1166index 9ae813e..86d325a 100644
1167--- a/arch/mips/kernel/early_printk.c
1168+++ b/arch/mips/kernel/early_printk.c
1169@@ -8,6 +8,7 @@
1170 * written by Ralf Baechle (ralf@linux-mips.org)
1171 */
1172 #include <linux/console.h>
1173+#include <linux/printk.h>
1174 #include <linux/init.h>
1175
1176 #include <asm/setup.h>
1177@@ -25,20 +26,18 @@ early_console_write(struct console *con, const char *s, unsigned n)
1178 }
1179 }
1180
1181-static struct console early_console __initdata = {
1182+static struct console early_console_prom = {
1183 .name = "early",
1184 .write = early_console_write,
1185 .flags = CON_PRINTBUFFER | CON_BOOT,
1186 .index = -1
1187 };
1188
1189-static int early_console_initialized __initdata;
1190-
1191 void __init setup_early_printk(void)
1192 {
1193- if (early_console_initialized)
1194+ if (early_console)
1195 return;
1196- early_console_initialized = 1;
1197+ early_console = &early_console_prom;
1198
1199- register_console(&early_console);
1200+ register_console(&early_console_prom);
1201 }
1202diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
1203index b6aa770..bfcaea6 100644
1204--- a/arch/mips/kernel/signal.c
1205+++ b/arch/mips/kernel/signal.c
1206@@ -601,6 +601,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
1207 __u32 thread_info_flags)
1208 {
1209 local_irq_enable();
1210+ preempt_check_resched();
1211
1212 /* deal with pending signal delivery */
1213 if (thread_info_flags & _TIF_SIGPENDING)
1214diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
1215index ddcec1e..90d193a 100644
1216--- a/arch/mips/mm/fault.c
1217+++ b/arch/mips/mm/fault.c
1218@@ -89,7 +89,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ
1219 * If we're in an interrupt or have no user
1220 * context, we must not take the fault..
1221 */
1222- if (in_atomic() || !mm)
1223+ if (!mm || pagefault_disabled())
1224 goto bad_area_nosemaphore;
1225
1226 retry:
1227diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
1228index d48a84f..34a83b9 100644
1229--- a/arch/mn10300/mm/fault.c
1230+++ b/arch/mn10300/mm/fault.c
1231@@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
1232 * If we're in an interrupt or have no user
1233 * context, we must not take the fault..
1234 */
1235- if (in_atomic() || !mm)
1236+ if (!mm || pagefault_disabled())
1237 goto no_context;
1238
1239 retry:
1240diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
1241index 18162ce..df22f39 100644
1242--- a/arch/parisc/mm/fault.c
1243+++ b/arch/parisc/mm/fault.c
1244@@ -176,7 +176,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
1245 unsigned long acc_type;
1246 int fault;
1247
1248- if (in_atomic() || !mm)
1249+ if (!mm || pagefault_disabled())
1250 goto no_context;
1251
1252 down_read(&mm->mmap_sem);
1253diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
1254index dabe429..281256e 100644
1255--- a/arch/powerpc/Kconfig
1256+++ b/arch/powerpc/Kconfig
1257@@ -60,10 +60,11 @@ config LOCKDEP_SUPPORT
1258
1259 config RWSEM_GENERIC_SPINLOCK
1260 bool
1261+ default y if PREEMPT_RT_FULL
1262
1263 config RWSEM_XCHGADD_ALGORITHM
1264 bool
1265- default y
1266+ default y if !PREEMPT_RT_FULL
1267
1268 config GENERIC_LOCKBREAK
1269 bool
1270@@ -141,6 +142,7 @@ config PPC
1271 select GENERIC_CLOCKEVENTS
1272 select GENERIC_STRNCPY_FROM_USER
1273 select GENERIC_STRNLEN_USER
1274+ select HAVE_PREEMPT_LAZY
1275 select HAVE_MOD_ARCH_SPECIFIC
1276 select MODULES_USE_ELF_RELA
1277 select CLONE_BACKWARDS
1278@@ -290,7 +292,7 @@ menu "Kernel options"
1279
1280 config HIGHMEM
1281 bool "High memory support"
1282- depends on PPC32
1283+ depends on PPC32 && !PREEMPT_RT_FULL
1284
1285 source kernel/Kconfig.hz
1286 source kernel/Kconfig.preempt
1287diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
1288index 406b7b9..14c08f1 100644
1289--- a/arch/powerpc/include/asm/thread_info.h
1290+++ b/arch/powerpc/include/asm/thread_info.h
1291@@ -43,6 +43,8 @@ struct thread_info {
1292 int cpu; /* cpu we're on */
1293 int preempt_count; /* 0 => preemptable,
1294 <0 => BUG */
1295+ int preempt_lazy_count; /* 0 => preemptable,
1296+ <0 => BUG */
1297 struct restart_block restart_block;
1298 unsigned long local_flags; /* private flags for thread */
1299
1300@@ -97,7 +99,7 @@ static inline struct thread_info *current_thread_info(void)
1301 #define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */
1302 #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
1303 #define TIF_SINGLESTEP 8 /* singlestepping active */
1304-#define TIF_MEMDIE 9 /* is terminating due to OOM killer */
1305+#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
1306 #define TIF_SECCOMP 10 /* secure computing */
1307 #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
1308 #define TIF_NOERROR 12 /* Force successful syscall return */
1309@@ -106,6 +108,7 @@ static inline struct thread_info *current_thread_info(void)
1310 #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */
1311 #define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation
1312 for stack store? */
1313+#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
1314
1315 /* as above, but as bit values */
1316 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
1317@@ -124,12 +127,15 @@ static inline struct thread_info *current_thread_info(void)
1318 #define _TIF_UPROBE (1<<TIF_UPROBE)
1319 #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
1320 #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
1321+#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
1322 #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
1323 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
1324
1325 #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
1326- _TIF_NOTIFY_RESUME | _TIF_UPROBE)
1327+ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
1328+ _TIF_NEED_RESCHED_LAZY)
1329 #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
1330+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
1331
1332 /* Bits in local_flags */
1333 /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
1334diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
1335index 4e23ba2..4c3e321 100644
1336--- a/arch/powerpc/kernel/asm-offsets.c
1337+++ b/arch/powerpc/kernel/asm-offsets.c
1338@@ -124,6 +124,7 @@ int main(void)
1339 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
1340 DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
1341 DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
1342+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
1343 DEFINE(TI_TASK, offsetof(struct thread_info, task));
1344 DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
1345
1346diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
1347index e304bcd..90521e1 100644
1348--- a/arch/powerpc/kernel/entry_32.S
1349+++ b/arch/powerpc/kernel/entry_32.S
1350@@ -899,7 +899,14 @@ resume_kernel:
1351 cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
1352 bne restore
1353 andi. r8,r8,_TIF_NEED_RESCHED
1354+ bne+ 1f
1355+ lwz r0,TI_PREEMPT_LAZY(r9)
1356+ cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
1357+ bne restore
1358+ lwz r0,TI_FLAGS(r9)
1359+ andi. r0,r0,_TIF_NEED_RESCHED_LAZY
1360 beq+ restore
1361+1:
1362 lwz r3,_MSR(r1)
1363 andi. r0,r3,MSR_EE /* interrupts off? */
1364 beq restore /* don't schedule if so */
1365@@ -910,11 +917,11 @@ resume_kernel:
1366 */
1367 bl trace_hardirqs_off
1368 #endif
1369-1: bl preempt_schedule_irq
1370+2: bl preempt_schedule_irq
1371 CURRENT_THREAD_INFO(r9, r1)
1372 lwz r3,TI_FLAGS(r9)
1373- andi. r0,r3,_TIF_NEED_RESCHED
1374- bne- 1b
1375+ andi. r0,r3,_TIF_NEED_RESCHED_MASK
1376+ bne- 2b
1377 #ifdef CONFIG_TRACE_IRQFLAGS
1378 /* And now, to properly rebalance the above, we tell lockdep they
1379 * are being turned back on, which will happen when we return
1380@@ -1235,7 +1242,7 @@ global_dbcr0:
1381 #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
1382
1383 do_work: /* r10 contains MSR_KERNEL here */
1384- andi. r0,r9,_TIF_NEED_RESCHED
1385+ andi. r0,r9,_TIF_NEED_RESCHED_MASK
1386 beq do_user_signal
1387
1388 do_resched: /* r10 contains MSR_KERNEL here */
1389@@ -1256,7 +1263,7 @@ recheck:
1390 MTMSRD(r10) /* disable interrupts */
1391 CURRENT_THREAD_INFO(r9, r1)
1392 lwz r9,TI_FLAGS(r9)
1393- andi. r0,r9,_TIF_NEED_RESCHED
1394+ andi. r0,r9,_TIF_NEED_RESCHED_MASK
1395 bne- do_resched
1396 andi. r0,r9,_TIF_USER_WORK_MASK
1397 beq restore_user
1398diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
1399index e0822a3..9f0ca17 100644
1400--- a/arch/powerpc/kernel/entry_64.S
1401+++ b/arch/powerpc/kernel/entry_64.S
1402@@ -592,7 +592,7 @@ _GLOBAL(ret_from_except_lite)
1403 andi. r0,r4,_TIF_USER_WORK_MASK
1404 beq restore
1405
1406- andi. r0,r4,_TIF_NEED_RESCHED
1407+ andi. r0,r4,_TIF_NEED_RESCHED_MASK
1408 beq 1f
1409 bl .restore_interrupts
1410 bl .schedule
1411@@ -642,10 +642,18 @@ resume_kernel:
1412
1413 #ifdef CONFIG_PREEMPT
1414 /* Check if we need to preempt */
1415+ lwz r8,TI_PREEMPT(r9)
1416+ cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
1417+ bne restore
1418 andi. r0,r4,_TIF_NEED_RESCHED
1419+ bne+ check_count
1420+
1421+ andi. r0,r4,_TIF_NEED_RESCHED_LAZY
1422 beq+ restore
1423+ lwz r8,TI_PREEMPT_LAZY(r9)
1424+
1425 /* Check that preempt_count() == 0 and interrupts are enabled */
1426- lwz r8,TI_PREEMPT(r9)
1427+check_count:
1428 cmpwi cr1,r8,0
1429 ld r0,SOFTE(r1)
1430 cmpdi r0,0
1431@@ -662,7 +670,7 @@ resume_kernel:
1432 /* Re-test flags and eventually loop */
1433 CURRENT_THREAD_INFO(r9, r1)
1434 ld r4,TI_FLAGS(r9)
1435- andi. r0,r4,_TIF_NEED_RESCHED
1436+ andi. r0,r4,_TIF_NEED_RESCHED_MASK
1437 bne 1b
1438
1439 /*
1440diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
1441index 71413f4..bb73a2e 100644
1442--- a/arch/powerpc/kernel/irq.c
1443+++ b/arch/powerpc/kernel/irq.c
1444@@ -584,6 +584,7 @@ void irq_ctx_init(void)
1445 }
1446 }
1447
1448+#ifndef CONFIG_PREEMPT_RT_FULL
1449 static inline void do_softirq_onstack(void)
1450 {
1451 struct thread_info *curtp, *irqtp;
1452@@ -620,6 +621,7 @@ void do_softirq(void)
1453
1454 local_irq_restore(flags);
1455 }
1456+#endif
1457
1458 irq_hw_number_t virq_to_hw(unsigned int virq)
1459 {
1460diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
1461index 19e096b..313078b 100644
1462--- a/arch/powerpc/kernel/misc_32.S
1463+++ b/arch/powerpc/kernel/misc_32.S
1464@@ -36,6 +36,7 @@
1465
1466 .text
1467
1468+#ifndef CONFIG_PREEMPT_RT_FULL
1469 _GLOBAL(call_do_softirq)
1470 mflr r0
1471 stw r0,4(r1)
1472@@ -46,6 +47,7 @@ _GLOBAL(call_do_softirq)
1473 lwz r0,4(r1)
1474 mtlr r0
1475 blr
1476+#endif
1477
1478 _GLOBAL(call_handle_irq)
1479 mflr r0
1480diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
1481index 5cfa800..39b77e8 100644
1482--- a/arch/powerpc/kernel/misc_64.S
1483+++ b/arch/powerpc/kernel/misc_64.S
1484@@ -29,6 +29,7 @@
1485
1486 .text
1487
1488+#ifndef CONFIG_PREEMPT_RT_FULL
1489 _GLOBAL(call_do_softirq)
1490 mflr r0
1491 std r0,16(r1)
1492@@ -39,6 +40,7 @@ _GLOBAL(call_do_softirq)
1493 ld r0,16(r1)
1494 mtlr r0
1495 blr
1496+#endif
1497
1498 _GLOBAL(call_handle_irq)
1499 ld r8,0(r6)
1500diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
1501index f974849..13b8670 100644
1502--- a/arch/powerpc/kernel/udbg.c
1503+++ b/arch/powerpc/kernel/udbg.c
1504@@ -156,15 +156,13 @@ static struct console udbg_console = {
1505 .index = 0,
1506 };
1507
1508-static int early_console_initialized;
1509-
1510 /*
1511 * Called by setup_system after ppc_md->probe and ppc_md->early_init.
1512 * Call it again after setting udbg_putc in ppc_md->setup_arch.
1513 */
1514 void __init register_early_udbg_console(void)
1515 {
1516- if (early_console_initialized)
1517+ if (early_console)
1518 return;
1519
1520 if (!udbg_putc)
1521@@ -174,7 +172,7 @@ void __init register_early_udbg_console(void)
1522 printk(KERN_INFO "early console immortal !\n");
1523 udbg_console.flags &= ~CON_BOOT;
1524 }
1525- early_console_initialized = 1;
1526+ early_console = &udbg_console;
1527 register_console(&udbg_console);
1528 }
1529
1530diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
1531index 3a8489a..66fdd82 100644
1532--- a/arch/powerpc/mm/fault.c
1533+++ b/arch/powerpc/mm/fault.c
1534@@ -259,7 +259,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
1535 if (!arch_irq_disabled_regs(regs))
1536 local_irq_enable();
1537
1538- if (in_atomic() || mm == NULL) {
1539+ if (!mm || pagefault_disabled()) {
1540 if (!user_mode(regs))
1541 return SIGSEGV;
1542 /* in_atomic() in user mode is really bad,
1543diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
1544index 1e12108..806cbbd 100644
1545--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
1546+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
1547@@ -43,6 +43,7 @@ static irqreturn_t timebase_interrupt(int irq, void *dev)
1548
1549 static struct irqaction tbint_irqaction = {
1550 .handler = timebase_interrupt,
1551+ .flags = IRQF_NO_THREAD,
1552 .name = "tbint",
1553 };
1554
1555diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
1556index d4fa03f..5e6ff38 100644
1557--- a/arch/powerpc/sysdev/cpm1.c
1558+++ b/arch/powerpc/sysdev/cpm1.c
1559@@ -120,6 +120,7 @@ static irqreturn_t cpm_error_interrupt(int irq, void *dev)
1560
1561 static struct irqaction cpm_error_irqaction = {
1562 .handler = cpm_error_interrupt,
1563+ .flags = IRQF_NO_THREAD,
1564 .name = "error",
1565 };
1566
1567diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
1568index 6e53d97..b94b478 100644
1569--- a/arch/powerpc/sysdev/fsl_msi.c
1570+++ b/arch/powerpc/sysdev/fsl_msi.c
1571@@ -333,6 +333,8 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
1572 return 0;
1573 }
1574
1575+static struct lock_class_key fsl_msi_irq_class;
1576+
1577 static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
1578 int offset, int irq_index)
1579 {
1580@@ -351,7 +353,7 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
1581 dev_err(&dev->dev, "No memory for MSI cascade data\n");
1582 return -ENOMEM;
1583 }
1584-
1585+ irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class);
1586 msi->msi_virqs[irq_index] = virt_msir;
1587 cascade_data->index = offset;
1588 cascade_data->msi_data = msi;
1589diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
1590index 2fb9e63..62d659d 100644
1591--- a/arch/s390/mm/fault.c
1592+++ b/arch/s390/mm/fault.c
1593@@ -296,7 +296,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
1594 * user context.
1595 */
1596 fault = VM_FAULT_BADCONTEXT;
1597- if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
1598+ if (unlikely(!user_space_fault(trans_exc_code) ||
1599+ !mm || pagefault_disabled()))
1600 goto out;
1601
1602 address = trans_exc_code & __FAIL_ADDR_MASK;
1603@@ -435,7 +436,8 @@ void __kprobes do_asce_exception(struct pt_regs *regs)
1604 clear_tsk_thread_flag(current, TIF_PER_TRAP);
1605
1606 trans_exc_code = regs->int_parm_long;
1607- if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
1608+ if (unlikely(!user_space_fault(trans_exc_code) || !mm ||
1609+ pagefault_disabled()))
1610 goto no_context;
1611
1612 down_read(&mm->mmap_sem);
1613diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c
1614index 47b600e..59fccbe 100644
1615--- a/arch/score/mm/fault.c
1616+++ b/arch/score/mm/fault.c
1617@@ -72,7 +72,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
1618 * If we're in an interrupt or have no user
1619 * context, we must not take the fault..
1620 */
1621- if (in_atomic() || !mm)
1622+ if (!mm || pagefault_disabled())
1623 goto bad_area_nosemaphore;
1624
1625 down_read(&mm->mmap_sem);
1626diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
1627index 063af10..ae4b141 100644
1628--- a/arch/sh/kernel/irq.c
1629+++ b/arch/sh/kernel/irq.c
1630@@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu)
1631 hardirq_ctx[cpu] = NULL;
1632 }
1633
1634+#ifndef CONFIG_PREEMPT_RT_FULL
1635 asmlinkage void do_softirq(void)
1636 {
1637 unsigned long flags;
1638@@ -191,6 +192,7 @@ asmlinkage void do_softirq(void)
1639
1640 local_irq_restore(flags);
1641 }
1642+#endif
1643 #else
1644 static inline void handle_one_irq(unsigned int irq)
1645 {
1646diff --git a/arch/sh/kernel/sh_bios.c b/arch/sh/kernel/sh_bios.c
1647index 47475cc..a5b51b9 100644
1648--- a/arch/sh/kernel/sh_bios.c
1649+++ b/arch/sh/kernel/sh_bios.c
1650@@ -144,8 +144,6 @@ static struct console bios_console = {
1651 .index = -1,
1652 };
1653
1654-static struct console *early_console;
1655-
1656 static int __init setup_early_printk(char *buf)
1657 {
1658 int keep_early = 0;
1659diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
1660index 1f49c28..8ff1613 100644
1661--- a/arch/sh/mm/fault.c
1662+++ b/arch/sh/mm/fault.c
1663@@ -440,7 +440,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
1664 * If we're in an interrupt, have no user context or are running
1665 * in an atomic region then we must not take the fault:
1666 */
1667- if (unlikely(in_atomic() || !mm)) {
1668+ if (unlikely(!mm || pagefault_disabled())) {
1669 bad_area_nosemaphore(regs, error_code, address);
1670 return;
1671 }
1672diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
1673index 9bcbbe2..1bc5cd8 100644
1674--- a/arch/sparc/kernel/irq_64.c
1675+++ b/arch/sparc/kernel/irq_64.c
1676@@ -698,6 +698,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs)
1677 set_irq_regs(old_regs);
1678 }
1679
1680+#ifndef CONFIG_PREEMPT_RT_FULL
1681 void do_softirq(void)
1682 {
1683 unsigned long flags;
1684@@ -723,6 +724,7 @@ void do_softirq(void)
1685
1686 local_irq_restore(flags);
1687 }
1688+#endif
1689
1690 #ifdef CONFIG_HOTPLUG_CPU
1691 void fixup_irqs(void)
1692diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
1693index 1303021..9f20566 100644
1694--- a/arch/sparc/kernel/prom_common.c
1695+++ b/arch/sparc/kernel/prom_common.c
1696@@ -64,7 +64,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
1697 err = -ENODEV;
1698
1699 mutex_lock(&of_set_property_mutex);
1700- write_lock(&devtree_lock);
1701+ raw_spin_lock(&devtree_lock);
1702 prevp = &dp->properties;
1703 while (*prevp) {
1704 struct property *prop = *prevp;
1705@@ -91,7 +91,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
1706 }
1707 prevp = &(*prevp)->next;
1708 }
1709- write_unlock(&devtree_lock);
1710+ raw_spin_unlock(&devtree_lock);
1711 mutex_unlock(&of_set_property_mutex);
1712
1713 /* XXX Upate procfs if necessary... */
1714diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
1715index 38bf80a..f4fb00e 100644
1716--- a/arch/sparc/kernel/setup_32.c
1717+++ b/arch/sparc/kernel/setup_32.c
1718@@ -309,6 +309,7 @@ void __init setup_arch(char **cmdline_p)
1719
1720 boot_flags_init(*cmdline_p);
1721
1722+ early_console = &prom_early_console;
1723 register_console(&prom_early_console);
1724
1725 printk("ARCH: ");
1726diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
1727index 0eaf005..ede7dc3 100644
1728--- a/arch/sparc/kernel/setup_64.c
1729+++ b/arch/sparc/kernel/setup_64.c
1730@@ -551,6 +551,12 @@ static void __init init_sparc64_elf_hwcap(void)
1731 pause_patch();
1732 }
1733
1734+static inline void register_prom_console(void)
1735+{
1736+ early_console = &prom_early_console;
1737+ register_console(&prom_early_console);
1738+}
1739+
1740 void __init setup_arch(char **cmdline_p)
1741 {
1742 /* Initialize PROM console and command line. */
1743@@ -562,7 +568,7 @@ void __init setup_arch(char **cmdline_p)
1744 #ifdef CONFIG_EARLYFB
1745 if (btext_find_display())
1746 #endif
1747- register_console(&prom_early_console);
1748+ register_prom_console();
1749
1750 if (tlb_type == hypervisor)
1751 printk("ARCH: SUN4V\n");
1752diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
1753index e98bfda..18cbe13 100644
1754--- a/arch/sparc/mm/fault_32.c
1755+++ b/arch/sparc/mm/fault_32.c
1756@@ -200,7 +200,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
1757 * If we're in an interrupt or have no user
1758 * context, we must not take the fault..
1759 */
1760- if (in_atomic() || !mm)
1761+ if (!mm || pagefault_disabled())
1762 goto no_context;
1763
1764 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1765diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
1766index 5062ff3..2764ac6 100644
1767--- a/arch/sparc/mm/fault_64.c
1768+++ b/arch/sparc/mm/fault_64.c
1769@@ -321,7 +321,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
1770 * If we're in an interrupt or have no user
1771 * context, we must not take the fault..
1772 */
1773- if (in_atomic() || !mm)
1774+ if (!mm || pagefault_disabled())
1775 goto intr_or_no_mm;
1776
1777 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1778diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c
1779index afb9c9a..34d72a1 100644
1780--- a/arch/tile/kernel/early_printk.c
1781+++ b/arch/tile/kernel/early_printk.c
1782@@ -17,6 +17,7 @@
1783 #include <linux/init.h>
1784 #include <linux/string.h>
1785 #include <linux/irqflags.h>
1786+#include <linux/printk.h>
1787 #include <asm/setup.h>
1788 #include <hv/hypervisor.h>
1789
1790@@ -33,25 +34,8 @@ static struct console early_hv_console = {
1791 };
1792
1793 /* Direct interface for emergencies */
1794-static struct console *early_console = &early_hv_console;
1795-static int early_console_initialized;
1796 static int early_console_complete;
1797
1798-static void early_vprintk(const char *fmt, va_list ap)
1799-{
1800- char buf[512];
1801- int n = vscnprintf(buf, sizeof(buf), fmt, ap);
1802- early_console->write(early_console, buf, n);
1803-}
1804-
1805-void early_printk(const char *fmt, ...)
1806-{
1807- va_list ap;
1808- va_start(ap, fmt);
1809- early_vprintk(fmt, ap);
1810- va_end(ap);
1811-}
1812-
1813 void early_panic(const char *fmt, ...)
1814 {
1815 va_list ap;
1816@@ -69,14 +53,13 @@ static int __initdata keep_early;
1817
1818 static int __init setup_early_printk(char *str)
1819 {
1820- if (early_console_initialized)
1821+ if (early_console)
1822 return 1;
1823
1824 if (str != NULL && strncmp(str, "keep", 4) == 0)
1825 keep_early = 1;
1826
1827 early_console = &early_hv_console;
1828- early_console_initialized = 1;
1829 register_console(early_console);
1830
1831 return 0;
1832@@ -85,12 +68,12 @@ static int __init setup_early_printk(char *str)
1833 void __init disable_early_printk(void)
1834 {
1835 early_console_complete = 1;
1836- if (!early_console_initialized || !early_console)
1837+ if (!early_console)
1838 return;
1839 if (!keep_early) {
1840 early_printk("disabling early console\n");
1841 unregister_console(early_console);
1842- early_console_initialized = 0;
1843+ early_console = NULL;
1844 } else {
1845 early_printk("keeping early console\n");
1846 }
1847@@ -98,7 +81,7 @@ void __init disable_early_printk(void)
1848
1849 void warn_early_printk(void)
1850 {
1851- if (early_console_complete || early_console_initialized)
1852+ if (early_console_complete || early_console)
1853 return;
1854 early_printk("\
1855 Machine shutting down before console output is fully initialized.\n\
1856diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
1857index 3d2b81c..1ba0ccc 100644
1858--- a/arch/tile/mm/fault.c
1859+++ b/arch/tile/mm/fault.c
1860@@ -360,7 +360,7 @@ static int handle_page_fault(struct pt_regs *regs,
1861 * If we're in an interrupt, have no user context or are running in an
1862 * atomic region then we must not take the fault.
1863 */
1864- if (in_atomic() || !mm) {
1865+ if (!mm || pagefault_disabled()) {
1866 vma = NULL; /* happy compiler */
1867 goto bad_area_nosemaphore;
1868 }
1869diff --git a/arch/um/kernel/early_printk.c b/arch/um/kernel/early_printk.c
1870index 49480f0..4a0800b 100644
1871--- a/arch/um/kernel/early_printk.c
1872+++ b/arch/um/kernel/early_printk.c
1873@@ -16,7 +16,7 @@ static void early_console_write(struct console *con, const char *s, unsigned int
1874 um_early_printk(s, n);
1875 }
1876
1877-static struct console early_console = {
1878+static struct console early_console_dev = {
1879 .name = "earlycon",
1880 .write = early_console_write,
1881 .flags = CON_BOOT,
1882@@ -25,8 +25,10 @@ static struct console early_console = {
1883
1884 static int __init setup_early_printk(char *buf)
1885 {
1886- register_console(&early_console);
1887-
1888+ if (!early_console) {
1889+ early_console = &early_console_dev;
1890+ register_console(&early_console_dev);
1891+ }
1892 return 0;
1893 }
1894
1895diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
1896index 089f398..991b33a 100644
1897--- a/arch/um/kernel/trap.c
1898+++ b/arch/um/kernel/trap.c
1899@@ -39,7 +39,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
1900 * If the fault was during atomic operation, don't take the fault, just
1901 * fail.
1902 */
1903- if (in_atomic())
1904+ if (pagefault_disabled())
1905 goto out_nosemaphore;
1906
1907 retry:
1908diff --git a/arch/unicore32/kernel/early_printk.c b/arch/unicore32/kernel/early_printk.c
1909index 3922255..9be0d5d 100644
1910--- a/arch/unicore32/kernel/early_printk.c
1911+++ b/arch/unicore32/kernel/early_printk.c
1912@@ -33,21 +33,17 @@ static struct console early_ocd_console = {
1913 .index = -1,
1914 };
1915
1916-/* Direct interface for emergencies */
1917-static struct console *early_console = &early_ocd_console;
1918-
1919-static int __initdata keep_early;
1920-
1921 static int __init setup_early_printk(char *buf)
1922 {
1923- if (!buf)
1924+ int keep_early;
1925+
1926+ if (!buf || early_console)
1927 return 0;
1928
1929 if (strstr(buf, "keep"))
1930 keep_early = 1;
1931
1932- if (!strncmp(buf, "ocd", 3))
1933- early_console = &early_ocd_console;
1934+ early_console = &early_ocd_console;
1935
1936 if (keep_early)
1937 early_console->flags &= ~CON_BOOT;
1938diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
1939index 0694d09..585e236 100644
1940--- a/arch/x86/Kconfig
1941+++ b/arch/x86/Kconfig
1942@@ -108,6 +108,7 @@ config X86
1943 select KTIME_SCALAR if X86_32
1944 select GENERIC_STRNCPY_FROM_USER
1945 select GENERIC_STRNLEN_USER
1946+ select HAVE_PREEMPT_LAZY
1947 select HAVE_CONTEXT_TRACKING if X86_64
1948 select HAVE_IRQ_TIME_ACCOUNTING
1949 select MODULES_USE_ELF_REL if X86_32
1950@@ -173,8 +174,11 @@ config ARCH_MAY_HAVE_PC_FDC
1951 def_bool y
1952 depends on ISA_DMA_API
1953
1954+config RWSEM_GENERIC_SPINLOCK
1955+ def_bool PREEMPT_RT_FULL
1956+
1957 config RWSEM_XCHGADD_ALGORITHM
1958- def_bool y
1959+ def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
1960
1961 config GENERIC_CALIBRATE_DELAY
1962 def_bool y
1963@@ -772,7 +776,7 @@ config IOMMU_HELPER
1964 config MAXSMP
1965 bool "Enable Maximum number of SMP Processors and NUMA Nodes"
1966 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
1967- select CPUMASK_OFFSTACK
1968+ select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
1969 ---help---
1970 Enable maximum number of CPUS and NUMA Nodes for this architecture.
1971 If unsure, say N.
1972diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
1973index 1b9c22b..653314e 100644
1974--- a/arch/x86/crypto/aesni-intel_glue.c
1975+++ b/arch/x86/crypto/aesni-intel_glue.c
1976@@ -250,14 +250,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc,
1977 err = blkcipher_walk_virt(desc, &walk);
1978 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1979
1980- kernel_fpu_begin();
1981 while ((nbytes = walk.nbytes)) {
1982+ kernel_fpu_begin();
1983 aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
1984- nbytes & AES_BLOCK_MASK);
1985+ nbytes & AES_BLOCK_MASK);
1986+ kernel_fpu_end();
1987 nbytes &= AES_BLOCK_SIZE - 1;
1988 err = blkcipher_walk_done(desc, &walk, nbytes);
1989 }
1990- kernel_fpu_end();
1991
1992 return err;
1993 }
1994@@ -274,14 +274,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc,
1995 err = blkcipher_walk_virt(desc, &walk);
1996 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1997
1998- kernel_fpu_begin();
1999 while ((nbytes = walk.nbytes)) {
2000+ kernel_fpu_begin();
2001 aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
2002 nbytes & AES_BLOCK_MASK);
2003+ kernel_fpu_end();
2004 nbytes &= AES_BLOCK_SIZE - 1;
2005 err = blkcipher_walk_done(desc, &walk, nbytes);
2006 }
2007- kernel_fpu_end();
2008
2009 return err;
2010 }
2011@@ -298,14 +298,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc,
2012 err = blkcipher_walk_virt(desc, &walk);
2013 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
2014
2015- kernel_fpu_begin();
2016 while ((nbytes = walk.nbytes)) {
2017+ kernel_fpu_begin();
2018 aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
2019 nbytes & AES_BLOCK_MASK, walk.iv);
2020+ kernel_fpu_end();
2021 nbytes &= AES_BLOCK_SIZE - 1;
2022 err = blkcipher_walk_done(desc, &walk, nbytes);
2023 }
2024- kernel_fpu_end();
2025
2026 return err;
2027 }
2028@@ -322,14 +322,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
2029 err = blkcipher_walk_virt(desc, &walk);
2030 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
2031
2032- kernel_fpu_begin();
2033 while ((nbytes = walk.nbytes)) {
2034+ kernel_fpu_begin();
2035 aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
2036 nbytes & AES_BLOCK_MASK, walk.iv);
2037+ kernel_fpu_end();
2038 nbytes &= AES_BLOCK_SIZE - 1;
2039 err = blkcipher_walk_done(desc, &walk, nbytes);
2040 }
2041- kernel_fpu_end();
2042
2043 return err;
2044 }
2045@@ -362,18 +362,20 @@ static int ctr_crypt(struct blkcipher_desc *desc,
2046 err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
2047 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
2048
2049- kernel_fpu_begin();
2050 while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
2051+ kernel_fpu_begin();
2052 aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
2053 nbytes & AES_BLOCK_MASK, walk.iv);
2054+ kernel_fpu_end();
2055 nbytes &= AES_BLOCK_SIZE - 1;
2056 err = blkcipher_walk_done(desc, &walk, nbytes);
2057 }
2058 if (walk.nbytes) {
2059+ kernel_fpu_begin();
2060 ctr_crypt_final(ctx, &walk);
2061+ kernel_fpu_end();
2062 err = blkcipher_walk_done(desc, &walk, 0);
2063 }
2064- kernel_fpu_end();
2065
2066 return err;
2067 }
2068diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
2069index 0c44630..1785dd7 100644
2070--- a/arch/x86/include/asm/acpi.h
2071+++ b/arch/x86/include/asm/acpi.h
2072@@ -51,8 +51,8 @@
2073
2074 #define ACPI_ASM_MACROS
2075 #define BREAKPOINT3
2076-#define ACPI_DISABLE_IRQS() local_irq_disable()
2077-#define ACPI_ENABLE_IRQS() local_irq_enable()
2078+#define ACPI_DISABLE_IRQS() local_irq_disable_nort()
2079+#define ACPI_ENABLE_IRQS() local_irq_enable_nort()
2080 #define ACPI_FLUSH_CPU_CACHE() wbinvd()
2081
2082 int __acpi_acquire_global_lock(unsigned int *lock);
2083diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
2084index 320f7bb..65b85f4 100644
2085--- a/arch/x86/include/asm/page_64_types.h
2086+++ b/arch/x86/include/asm/page_64_types.h
2087@@ -14,12 +14,21 @@
2088 #define IRQ_STACK_ORDER 2
2089 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
2090
2091-#define STACKFAULT_STACK 1
2092-#define DOUBLEFAULT_STACK 2
2093-#define NMI_STACK 3
2094-#define DEBUG_STACK 4
2095-#define MCE_STACK 5
2096-#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
2097+#ifdef CONFIG_PREEMPT_RT_FULL
2098+# define STACKFAULT_STACK 0
2099+# define DOUBLEFAULT_STACK 1
2100+# define NMI_STACK 2
2101+# define DEBUG_STACK 0
2102+# define MCE_STACK 3
2103+# define N_EXCEPTION_STACKS 3 /* hw limit: 7 */
2104+#else
2105+# define STACKFAULT_STACK 1
2106+# define DOUBLEFAULT_STACK 2
2107+# define NMI_STACK 3
2108+# define DEBUG_STACK 4
2109+# define MCE_STACK 5
2110+# define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
2111+#endif
2112
2113 #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
2114 #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
2115diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
2116index 111d272..aa4f8be 100644
2117--- a/arch/x86/include/asm/signal.h
2118+++ b/arch/x86/include/asm/signal.h
2119@@ -23,6 +23,19 @@ typedef struct {
2120 unsigned long sig[_NSIG_WORDS];
2121 } sigset_t;
2122
2123+/*
2124+ * Because some traps use the IST stack, we must keep preemption
2125+ * disabled while calling do_trap(), but do_trap() may call
2126+ * force_sig_info() which will grab the signal spin_locks for the
2127+ * task, which in PREEMPT_RT_FULL are mutexes. By defining
2128+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
2129+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
2130+ * trap.
2131+ */
2132+#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
2133+#define ARCH_RT_DELAYS_SIGNAL_SEND
2134+#endif
2135+
2136 #ifndef CONFIG_COMPAT
2137 typedef sigset_t compat_sigset_t;
2138 #endif
2139diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
2140index 6a99859..64fb5cb 100644
2141--- a/arch/x86/include/asm/stackprotector.h
2142+++ b/arch/x86/include/asm/stackprotector.h
2143@@ -57,7 +57,7 @@
2144 */
2145 static __always_inline void boot_init_stack_canary(void)
2146 {
2147- u64 canary;
2148+ u64 uninitialized_var(canary);
2149 u64 tsc;
2150
2151 #ifdef CONFIG_X86_64
2152@@ -68,8 +68,16 @@ static __always_inline void boot_init_stack_canary(void)
2153 * of randomness. The TSC only matters for very early init,
2154 * there it already has some randomness on most systems. Later
2155 * on during the bootup the random pool has true entropy too.
2156+ *
2157+ * For preempt-rt we need to weaken the randomness a bit, as
2158+ * we can't call into the random generator from atomic context
2159+ * due to locking constraints. We just leave canary
2160+ * uninitialized and use the TSC based randomness on top of
2161+ * it.
2162 */
2163+#ifndef CONFIG_PREEMPT_RT_FULL
2164 get_random_bytes(&canary, sizeof(canary));
2165+#endif
2166 tsc = __native_read_tsc();
2167 canary += tsc + (tsc << 32UL);
2168
2169diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
2170index 2d946e6..6b0fc2e 100644
2171--- a/arch/x86/include/asm/thread_info.h
2172+++ b/arch/x86/include/asm/thread_info.h
2173@@ -31,6 +31,8 @@ struct thread_info {
2174 __u32 cpu; /* current CPU */
2175 int preempt_count; /* 0 => preemptable,
2176 <0 => BUG */
2177+ int preempt_lazy_count; /* 0 => lazy preemptable,
2178+ <0 => BUG */
2179 mm_segment_t addr_limit;
2180 struct restart_block restart_block;
2181 void __user *sysenter_return;
2182@@ -82,6 +84,7 @@ struct thread_info {
2183 #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
2184 #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
2185 #define TIF_SECCOMP 8 /* secure computing */
2186+#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
2187 #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
2188 #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
2189 #define TIF_UPROBE 12 /* breakpointed or singlestepping */
2190@@ -107,6 +110,7 @@ struct thread_info {
2191 #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
2192 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
2193 #define _TIF_SECCOMP (1 << TIF_SECCOMP)
2194+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
2195 #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
2196 #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
2197 #define _TIF_UPROBE (1 << TIF_UPROBE)
2198@@ -157,6 +161,8 @@ struct thread_info {
2199 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
2200 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
2201
2202+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
2203+
2204 #define PREEMPT_ACTIVE 0x10000000
2205
2206 #ifdef CONFIG_X86_32
2207diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
2208index b739d39..aaa6399 100644
2209--- a/arch/x86/kernel/apic/io_apic.c
2210+++ b/arch/x86/kernel/apic/io_apic.c
2211@@ -2428,7 +2428,8 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
2212 static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
2213 {
2214 /* If we are moving the irq we need to mask it */
2215- if (unlikely(irqd_is_setaffinity_pending(data))) {
2216+ if (unlikely(irqd_is_setaffinity_pending(data) &&
2217+ !irqd_irq_inprogress(data))) {
2218 mask_ioapic(cfg);
2219 return true;
2220 }
2221diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
2222index 2861082..a36d9cf 100644
2223--- a/arch/x86/kernel/asm-offsets.c
2224+++ b/arch/x86/kernel/asm-offsets.c
2225@@ -33,6 +33,7 @@ void common(void) {
2226 OFFSET(TI_status, thread_info, status);
2227 OFFSET(TI_addr_limit, thread_info, addr_limit);
2228 OFFSET(TI_preempt_count, thread_info, preempt_count);
2229+ OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count);
2230
2231 BLANK();
2232 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
2233diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
2234index 9c3ab43..2636e0f 100644
2235--- a/arch/x86/kernel/cpu/common.c
2236+++ b/arch/x86/kernel/cpu/common.c
2237@@ -1103,7 +1103,9 @@ DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
2238 */
2239 static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
2240 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
2241+#if DEBUG_STACK > 0
2242 [DEBUG_STACK - 1] = DEBUG_STKSZ
2243+#endif
2244 };
2245
2246 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
2247diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
2248index 80dbda8..332e133 100644
2249--- a/arch/x86/kernel/cpu/mcheck/mce.c
2250+++ b/arch/x86/kernel/cpu/mcheck/mce.c
2251@@ -18,6 +18,7 @@
2252 #include <linux/rcupdate.h>
2253 #include <linux/kobject.h>
2254 #include <linux/uaccess.h>
2255+#include <linux/kthread.h>
2256 #include <linux/kdebug.h>
2257 #include <linux/kernel.h>
2258 #include <linux/percpu.h>
2259@@ -41,6 +42,7 @@
2260 #include <linux/debugfs.h>
2261 #include <linux/irq_work.h>
2262 #include <linux/export.h>
2263+#include <linux/jiffies.h>
2264
2265 #include <asm/processor.h>
2266 #include <asm/mce.h>
2267@@ -1259,7 +1261,7 @@ void mce_log_therm_throt_event(__u64 status)
2268 static unsigned long check_interval = 5 * 60; /* 5 minutes */
2269
2270 static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
2271-static DEFINE_PER_CPU(struct timer_list, mce_timer);
2272+static DEFINE_PER_CPU(struct hrtimer, mce_timer);
2273
2274 static unsigned long mce_adjust_timer_default(unsigned long interval)
2275 {
2276@@ -1269,13 +1271,10 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
2277 static unsigned long (*mce_adjust_timer)(unsigned long interval) =
2278 mce_adjust_timer_default;
2279
2280-static void mce_timer_fn(unsigned long data)
2281+static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
2282 {
2283- struct timer_list *t = &__get_cpu_var(mce_timer);
2284 unsigned long iv;
2285
2286- WARN_ON(smp_processor_id() != data);
2287-
2288 if (mce_available(__this_cpu_ptr(&cpu_info))) {
2289 machine_check_poll(MCP_TIMESTAMP,
2290 &__get_cpu_var(mce_poll_banks));
2291@@ -1296,9 +1295,10 @@ static void mce_timer_fn(unsigned long data)
2292 __this_cpu_write(mce_next_interval, iv);
2293 /* Might have become 0 after CMCI storm subsided */
2294 if (iv) {
2295- t->expires = jiffies + iv;
2296- add_timer_on(t, smp_processor_id());
2297+ hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_usecs(iv)));
2298+ return HRTIMER_RESTART;
2299 }
2300+ return HRTIMER_NORESTART;
2301 }
2302
2303 /*
2304@@ -1306,28 +1306,37 @@ static void mce_timer_fn(unsigned long data)
2305 */
2306 void mce_timer_kick(unsigned long interval)
2307 {
2308- struct timer_list *t = &__get_cpu_var(mce_timer);
2309- unsigned long when = jiffies + interval;
2310+ struct hrtimer *t = &__get_cpu_var(mce_timer);
2311 unsigned long iv = __this_cpu_read(mce_next_interval);
2312
2313- if (timer_pending(t)) {
2314- if (time_before(when, t->expires))
2315- mod_timer_pinned(t, when);
2316+ if (hrtimer_active(t)) {
2317+ s64 exp;
2318+ s64 intv_us;
2319+
2320+ intv_us = jiffies_to_usecs(interval);
2321+ exp = ktime_to_us(hrtimer_expires_remaining(t));
2322+ if (intv_us < exp) {
2323+ hrtimer_cancel(t);
2324+ hrtimer_start_range_ns(t,
2325+ ns_to_ktime(intv_us * 1000),
2326+ 0, HRTIMER_MODE_REL_PINNED);
2327+ }
2328 } else {
2329- t->expires = round_jiffies(when);
2330- add_timer_on(t, smp_processor_id());
2331+ hrtimer_start_range_ns(t,
2332+ ns_to_ktime(jiffies_to_usecs(interval) * 1000),
2333+ 0, HRTIMER_MODE_REL_PINNED);
2334 }
2335 if (interval < iv)
2336 __this_cpu_write(mce_next_interval, interval);
2337 }
2338
2339-/* Must not be called in IRQ context where del_timer_sync() can deadlock */
2340+/* Must not be called in IRQ context where hrtimer_cancel() can deadlock */
2341 static void mce_timer_delete_all(void)
2342 {
2343 int cpu;
2344
2345 for_each_online_cpu(cpu)
2346- del_timer_sync(&per_cpu(mce_timer, cpu));
2347+ hrtimer_cancel(&per_cpu(mce_timer, cpu));
2348 }
2349
2350 static void mce_do_trigger(struct work_struct *work)
2351@@ -1337,6 +1346,63 @@ static void mce_do_trigger(struct work_struct *work)
2352
2353 static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
2354
2355+static void __mce_notify_work(void)
2356+{
2357+ /* Not more than two messages every minute */
2358+ static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
2359+
2360+ /* wake processes polling /dev/mcelog */
2361+ wake_up_interruptible(&mce_chrdev_wait);
2362+
2363+ /*
2364+ * There is no risk of missing notifications because
2365+ * work_pending is always cleared before the function is
2366+ * executed.
2367+ */
2368+ if (mce_helper[0] && !work_pending(&mce_trigger_work))
2369+ schedule_work(&mce_trigger_work);
2370+
2371+ if (__ratelimit(&ratelimit))
2372+ pr_info(HW_ERR "Machine check events logged\n");
2373+}
2374+
2375+#ifdef CONFIG_PREEMPT_RT_FULL
2376+struct task_struct *mce_notify_helper;
2377+
2378+static int mce_notify_helper_thread(void *unused)
2379+{
2380+ while (1) {
2381+ set_current_state(TASK_INTERRUPTIBLE);
2382+ schedule();
2383+ if (kthread_should_stop())
2384+ break;
2385+ __mce_notify_work();
2386+ }
2387+ return 0;
2388+}
2389+
2390+static int mce_notify_work_init(void)
2391+{
2392+ mce_notify_helper = kthread_run(mce_notify_helper_thread, NULL,
2393+ "mce-notify");
2394+ if (!mce_notify_helper)
2395+ return -ENOMEM;
2396+
2397+ return 0;
2398+}
2399+
2400+static void mce_notify_work(void)
2401+{
2402+ wake_up_process(mce_notify_helper);
2403+}
2404+#else
2405+static void mce_notify_work(void)
2406+{
2407+ __mce_notify_work();
2408+}
2409+static inline int mce_notify_work_init(void) { return 0; }
2410+#endif
2411+
2412 /*
2413 * Notify the user(s) about new machine check events.
2414 * Can be called from interrupt context, but not from machine check/NMI
2415@@ -1344,24 +1410,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
2416 */
2417 int mce_notify_irq(void)
2418 {
2419- /* Not more than two messages every minute */
2420- static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
2421-
2422 if (test_and_clear_bit(0, &mce_need_notify)) {
2423- /* wake processes polling /dev/mcelog */
2424- wake_up_interruptible(&mce_chrdev_wait);
2425-
2426- /*
2427- * There is no risk of missing notifications because
2428- * work_pending is always cleared before the function is
2429- * executed.
2430- */
2431- if (mce_helper[0] && !work_pending(&mce_trigger_work))
2432- schedule_work(&mce_trigger_work);
2433-
2434- if (__ratelimit(&ratelimit))
2435- pr_info(HW_ERR "Machine check events logged\n");
2436-
2437+ mce_notify_work();
2438 return 1;
2439 }
2440 return 0;
2441@@ -1632,7 +1682,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
2442 }
2443 }
2444
2445-static void mce_start_timer(unsigned int cpu, struct timer_list *t)
2446+static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
2447 {
2448 unsigned long iv = mce_adjust_timer(check_interval * HZ);
2449
2450@@ -1641,16 +1691,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
2451 if (mca_cfg.ignore_ce || !iv)
2452 return;
2453
2454- t->expires = round_jiffies(jiffies + iv);
2455- add_timer_on(t, smp_processor_id());
2456+ hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000),
2457+ 0, HRTIMER_MODE_REL_PINNED);
2458 }
2459
2460 static void __mcheck_cpu_init_timer(void)
2461 {
2462- struct timer_list *t = &__get_cpu_var(mce_timer);
2463+ struct hrtimer *t = &__get_cpu_var(mce_timer);
2464 unsigned int cpu = smp_processor_id();
2465
2466- setup_timer(t, mce_timer_fn, cpu);
2467+ hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2468+ t->function = mce_timer_fn;
2469 mce_start_timer(cpu, t);
2470 }
2471
2472@@ -2307,6 +2358,8 @@ static void __cpuinit mce_disable_cpu(void *h)
2473 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2474 return;
2475
2476+ hrtimer_cancel(&__get_cpu_var(mce_timer));
2477+
2478 if (!(action & CPU_TASKS_FROZEN))
2479 cmci_clear();
2480 for (i = 0; i < mca_cfg.banks; i++) {
2481@@ -2333,6 +2386,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
2482 if (b->init)
2483 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
2484 }
2485+ __mcheck_cpu_init_timer();
2486 }
2487
2488 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
2489@@ -2340,7 +2394,6 @@ static int __cpuinit
2490 mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2491 {
2492 unsigned int cpu = (unsigned long)hcpu;
2493- struct timer_list *t = &per_cpu(mce_timer, cpu);
2494
2495 switch (action & ~CPU_TASKS_FROZEN) {
2496 case CPU_ONLINE:
2497@@ -2356,11 +2409,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2498 break;
2499 case CPU_DOWN_PREPARE:
2500 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
2501- del_timer_sync(t);
2502 break;
2503 case CPU_DOWN_FAILED:
2504 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
2505- mce_start_timer(cpu, t);
2506 break;
2507 }
2508
2509@@ -2422,6 +2473,8 @@ static __init int mcheck_init_device(void)
2510 /* register character device /dev/mcelog */
2511 misc_register(&mce_chrdev_device);
2512
2513+ err = mce_notify_work_init();
2514+
2515 return err;
2516 }
2517 device_initcall_sync(mcheck_init_device);
2518diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
2519index 115c1ea..f50cca1 100644
2520--- a/arch/x86/kernel/cpu/perf_event.h
2521+++ b/arch/x86/kernel/cpu/perf_event.h
2522@@ -108,6 +108,7 @@ struct intel_shared_regs {
2523 struct er_account regs[EXTRA_REG_MAX];
2524 int refcnt; /* per-core: #HT threads */
2525 unsigned core_id; /* per-core: core id */
2526+ struct rcu_head rcu;
2527 };
2528
2529 #define MAX_LBR_ENTRIES 16
2530diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
2531index 70602f8..0632237 100644
2532--- a/arch/x86/kernel/cpu/perf_event_intel.c
2533+++ b/arch/x86/kernel/cpu/perf_event_intel.c
2534@@ -1721,7 +1721,7 @@ static void intel_pmu_cpu_dying(int cpu)
2535 pc = cpuc->shared_regs;
2536 if (pc) {
2537 if (pc->core_id == -1 || --pc->refcnt == 0)
2538- kfree(pc);
2539+ kfree_rcu(pc, rcu);
2540 cpuc->shared_regs = NULL;
2541 }
2542
2543diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
2544index b43200d..85f64b3 100644
2545--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
2546+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
2547@@ -2636,7 +2636,7 @@ static void __cpuinit uncore_cpu_dying(int cpu)
2548 box = *per_cpu_ptr(pmu->box, cpu);
2549 *per_cpu_ptr(pmu->box, cpu) = NULL;
2550 if (box && atomic_dec_and_test(&box->refcnt))
2551- kfree(box);
2552+ kfree_rcu(box, rcu);
2553 }
2554 }
2555 }
2556@@ -2666,7 +2666,8 @@ static int __cpuinit uncore_cpu_starting(int cpu)
2557 if (exist && exist->phys_id == phys_id) {
2558 atomic_inc(&exist->refcnt);
2559 *per_cpu_ptr(pmu->box, cpu) = exist;
2560- kfree(box);
2561+ if (box)
2562+ kfree_rcu(box, rcu);
2563 box = NULL;
2564 break;
2565 }
2566diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
2567index e68a455..c4e1028 100644
2568--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
2569+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
2570@@ -421,6 +421,7 @@ struct intel_uncore_box {
2571 struct hrtimer hrtimer;
2572 struct list_head list;
2573 struct intel_uncore_extra_reg shared_regs[0];
2574+ struct rcu_head rcu;
2575 };
2576
2577 #define UNCORE_BOX_FLAG_INITIATED 0
2578diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
2579index b653675..f16c07b 100644
2580--- a/arch/x86/kernel/dumpstack_64.c
2581+++ b/arch/x86/kernel/dumpstack_64.c
2582@@ -21,10 +21,14 @@
2583 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
2584
2585 static char x86_stack_ids[][8] = {
2586+#if DEBUG_STACK > 0
2587 [ DEBUG_STACK-1 ] = "#DB",
2588+#endif
2589 [ NMI_STACK-1 ] = "NMI",
2590 [ DOUBLEFAULT_STACK-1 ] = "#DF",
2591+#if STACKFAULT_STACK > 0
2592 [ STACKFAULT_STACK-1 ] = "#SS",
2593+#endif
2594 [ MCE_STACK-1 ] = "#MC",
2595 #if DEBUG_STKSZ > EXCEPTION_STKSZ
2596 [ N_EXCEPTION_STACKS ...
2597diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
2598index 9b9f18b..d15f575 100644
2599--- a/arch/x86/kernel/early_printk.c
2600+++ b/arch/x86/kernel/early_printk.c
2601@@ -169,25 +169,9 @@ static struct console early_serial_console = {
2602 .index = -1,
2603 };
2604
2605-/* Direct interface for emergencies */
2606-static struct console *early_console = &early_vga_console;
2607-static int __initdata early_console_initialized;
2608-
2609-asmlinkage void early_printk(const char *fmt, ...)
2610-{
2611- char buf[512];
2612- int n;
2613- va_list ap;
2614-
2615- va_start(ap, fmt);
2616- n = vscnprintf(buf, sizeof(buf), fmt, ap);
2617- early_console->write(early_console, buf, n);
2618- va_end(ap);
2619-}
2620-
2621 static inline void early_console_register(struct console *con, int keep_early)
2622 {
2623- if (early_console->index != -1) {
2624+ if (con->index != -1) {
2625 printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
2626 con->name);
2627 return;
2628@@ -207,9 +191,8 @@ static int __init setup_early_printk(char *buf)
2629 if (!buf)
2630 return 0;
2631
2632- if (early_console_initialized)
2633+ if (early_console)
2634 return 0;
2635- early_console_initialized = 1;
2636
2637 keep = (strstr(buf, "keep") != NULL);
2638
2639diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
2640index 6ed91d9..218e79a 100644
2641--- a/arch/x86/kernel/entry_32.S
2642+++ b/arch/x86/kernel/entry_32.S
2643@@ -364,14 +364,22 @@ ENTRY(resume_kernel)
2644 DISABLE_INTERRUPTS(CLBR_ANY)
2645 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
2646 jnz restore_all
2647-need_resched:
2648 movl TI_flags(%ebp), %ecx # need_resched set ?
2649 testb $_TIF_NEED_RESCHED, %cl
2650+ jnz 1f
2651+
2652+ cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
2653+ jnz restore_all
2654+ testl $_TIF_NEED_RESCHED_LAZY, %ecx
2655 jz restore_all
2656- testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
2657+
2658+1: testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
2659 jz restore_all
2660 call preempt_schedule_irq
2661- jmp need_resched
2662+ movl TI_flags(%ebp), %ecx # need_resched set ?
2663+ testl $_TIF_NEED_RESCHED_MASK, %ecx
2664+ jnz 1b
2665+ jmp restore_all
2666 END(resume_kernel)
2667 #endif
2668 CFI_ENDPROC
2669@@ -607,7 +615,7 @@ ENDPROC(system_call)
2670 ALIGN
2671 RING0_PTREGS_FRAME # can't unwind into user space anyway
2672 work_pending:
2673- testb $_TIF_NEED_RESCHED, %cl
2674+ testl $_TIF_NEED_RESCHED_MASK, %ecx
2675 jz work_notifysig
2676 work_resched:
2677 call schedule
2678@@ -620,7 +628,7 @@ work_resched:
2679 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
2680 # than syscall tracing?
2681 jz restore_all
2682- testb $_TIF_NEED_RESCHED, %cl
2683+ testl $_TIF_NEED_RESCHED_MASK, %ecx
2684 jnz work_resched
2685
2686 work_notifysig: # deal with pending signals and
2687diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
2688index cb3c591..0b01d8d 100644
2689--- a/arch/x86/kernel/entry_64.S
2690+++ b/arch/x86/kernel/entry_64.S
2691@@ -673,8 +673,8 @@ sysret_check:
2692 /* Handle reschedules */
2693 /* edx: work, edi: workmask */
2694 sysret_careful:
2695- bt $TIF_NEED_RESCHED,%edx
2696- jnc sysret_signal
2697+ testl $_TIF_NEED_RESCHED_MASK,%edx
2698+ jz sysret_signal
2699 TRACE_IRQS_ON
2700 ENABLE_INTERRUPTS(CLBR_NONE)
2701 pushq_cfi %rdi
2702@@ -786,8 +786,8 @@ GLOBAL(int_with_check)
2703 /* First do a reschedule test. */
2704 /* edx: work, edi: workmask */
2705 int_careful:
2706- bt $TIF_NEED_RESCHED,%edx
2707- jnc int_very_careful
2708+ testl $_TIF_NEED_RESCHED_MASK,%edx
2709+ jz int_very_careful
2710 TRACE_IRQS_ON
2711 ENABLE_INTERRUPTS(CLBR_NONE)
2712 pushq_cfi %rdi
2713@@ -1094,8 +1094,8 @@ bad_iret:
2714 /* edi: workmask, edx: work */
2715 retint_careful:
2716 CFI_RESTORE_STATE
2717- bt $TIF_NEED_RESCHED,%edx
2718- jnc retint_signal
2719+ testl $_TIF_NEED_RESCHED_MASK,%edx
2720+ jz retint_signal
2721 TRACE_IRQS_ON
2722 ENABLE_INTERRUPTS(CLBR_NONE)
2723 pushq_cfi %rdi
2724@@ -1128,9 +1128,15 @@ retint_signal:
2725 ENTRY(retint_kernel)
2726 cmpl $0,TI_preempt_count(%rcx)
2727 jnz retint_restore_args
2728- bt $TIF_NEED_RESCHED,TI_flags(%rcx)
2729+ bt $TIF_NEED_RESCHED,TI_flags(%rcx)
2730+ jc 1f
2731+
2732+ cmpl $0,TI_preempt_lazy_count(%rcx)
2733+ jnz retint_restore_args
2734+ bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
2735 jnc retint_restore_args
2736- bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
2737+
2738+1: bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
2739 jnc retint_restore_args
2740 call preempt_schedule_irq
2741 jmp exit_intr
2742@@ -1337,6 +1343,7 @@ bad_gs:
2743 jmp 2b
2744 .previous
2745
2746+#ifndef CONFIG_PREEMPT_RT_FULL
2747 /* Call softirq on interrupt stack. Interrupts are off. */
2748 ENTRY(call_softirq)
2749 CFI_STARTPROC
2750@@ -1356,6 +1363,7 @@ ENTRY(call_softirq)
2751 ret
2752 CFI_ENDPROC
2753 END(call_softirq)
2754+#endif
2755
2756 #ifdef CONFIG_XEN
2757 zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
2758@@ -1520,7 +1528,7 @@ paranoid_userspace:
2759 movq %rsp,%rdi /* &pt_regs */
2760 call sync_regs
2761 movq %rax,%rsp /* switch stack for scheduling */
2762- testl $_TIF_NEED_RESCHED,%ebx
2763+ testl $_TIF_NEED_RESCHED_MASK,%ebx
2764 jnz paranoid_schedule
2765 movl %ebx,%edx /* arg3: thread flags */
2766 TRACE_IRQS_ON
2767diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
2768index e28670f..5ce3c25 100644
2769--- a/arch/x86/kernel/hpet.c
2770+++ b/arch/x86/kernel/hpet.c
2771@@ -8,6 +8,7 @@
2772 #include <linux/slab.h>
2773 #include <linux/hpet.h>
2774 #include <linux/init.h>
2775+#include <linux/dmi.h>
2776 #include <linux/cpu.h>
2777 #include <linux/pm.h>
2778 #include <linux/io.h>
2779@@ -573,6 +574,30 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
2780 #define RESERVE_TIMERS 0
2781 #endif
2782
2783+static int __init dmi_disable_hpet_msi(const struct dmi_system_id *d)
2784+{
2785+ hpet_msi_disable = 1;
2786+ return 0;
2787+}
2788+
2789+static struct dmi_system_id __initdata dmi_hpet_table[] = {
2790+ /*
2791+ * MSI based per cpu timers lose interrupts when intel_idle()
2792+ * is enabled - independent of the c-state. With idle=poll the
2793+ * problem cannot be observed. We have no idea yet, whether
2794+ * this is a W510 specific issue or a general chipset oddity.
2795+ */
2796+ {
2797+ .callback = dmi_disable_hpet_msi,
2798+ .ident = "Lenovo W510",
2799+ .matches = {
2800+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
2801+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W510"),
2802+ },
2803+ },
2804+ {}
2805+};
2806+
2807 static void hpet_msi_capability_lookup(unsigned int start_timer)
2808 {
2809 unsigned int id;
2810@@ -580,6 +605,8 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
2811 unsigned int num_timers_used = 0;
2812 int i;
2813
2814+ dmi_check_system(dmi_hpet_table);
2815+
2816 if (hpet_msi_disable)
2817 return;
2818
2819diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
2820index 344faf8..f60ecc0 100644
2821--- a/arch/x86/kernel/irq_32.c
2822+++ b/arch/x86/kernel/irq_32.c
2823@@ -149,6 +149,7 @@ void __cpuinit irq_ctx_init(int cpu)
2824 cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
2825 }
2826
2827+#ifndef CONFIG_PREEMPT_RT_FULL
2828 asmlinkage void do_softirq(void)
2829 {
2830 unsigned long flags;
2831@@ -179,6 +180,7 @@ asmlinkage void do_softirq(void)
2832
2833 local_irq_restore(flags);
2834 }
2835+#endif
2836
2837 bool handle_irq(unsigned irq, struct pt_regs *regs)
2838 {
2839diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
2840index d04d3ec..831f247 100644
2841--- a/arch/x86/kernel/irq_64.c
2842+++ b/arch/x86/kernel/irq_64.c
2843@@ -88,7 +88,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
2844 return true;
2845 }
2846
2847-
2848+#ifndef CONFIG_PREEMPT_RT_FULL
2849 extern void call_softirq(void);
2850
2851 asmlinkage void do_softirq(void)
2852@@ -108,3 +108,4 @@ asmlinkage void do_softirq(void)
2853 }
2854 local_irq_restore(flags);
2855 }
2856+#endif
2857diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
2858index ca8f703..129b8bb 100644
2859--- a/arch/x86/kernel/irq_work.c
2860+++ b/arch/x86/kernel/irq_work.c
2861@@ -18,6 +18,7 @@ void smp_irq_work_interrupt(struct pt_regs *regs)
2862 irq_exit();
2863 }
2864
2865+#ifndef CONFIG_PREEMPT_RT_FULL
2866 void arch_irq_work_raise(void)
2867 {
2868 #ifdef CONFIG_X86_LOCAL_APIC
2869@@ -28,3 +29,4 @@ void arch_irq_work_raise(void)
2870 apic_wait_icr_idle();
2871 #endif
2872 }
2873+#endif
2874diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
2875index b5a8905..139ad27 100644
2876--- a/arch/x86/kernel/process_32.c
2877+++ b/arch/x86/kernel/process_32.c
2878@@ -36,6 +36,7 @@
2879 #include <linux/uaccess.h>
2880 #include <linux/io.h>
2881 #include <linux/kdebug.h>
2882+#include <linux/highmem.h>
2883
2884 #include <asm/pgtable.h>
2885 #include <asm/ldt.h>
2886@@ -216,6 +217,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
2887 }
2888 EXPORT_SYMBOL_GPL(start_thread);
2889
2890+#ifdef CONFIG_PREEMPT_RT_FULL
2891+static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
2892+{
2893+ int i;
2894+
2895+ /*
2896+ * Clear @prev's kmap_atomic mappings
2897+ */
2898+ for (i = 0; i < prev_p->kmap_idx; i++) {
2899+ int idx = i + KM_TYPE_NR * smp_processor_id();
2900+ pte_t *ptep = kmap_pte - idx;
2901+
2902+ kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
2903+ }
2904+ /*
2905+ * Restore @next_p's kmap_atomic mappings
2906+ */
2907+ for (i = 0; i < next_p->kmap_idx; i++) {
2908+ int idx = i + KM_TYPE_NR * smp_processor_id();
2909+
2910+ if (!pte_none(next_p->kmap_pte[i]))
2911+ set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
2912+ }
2913+}
2914+#else
2915+static inline void
2916+switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
2917+#endif
2918+
2919
2920 /*
2921 * switch_to(x,y) should switch tasks from x to y.
2922@@ -295,6 +325,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
2923 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
2924 __switch_to_xtra(prev_p, next_p, tss);
2925
2926+ switch_kmaps(prev_p, next_p);
2927+
2928 /*
2929 * Leave lazy mode, flushing any hypercalls made here.
2930 * This must be done before restoring TLS segments so
2931diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
2932index d6bf1f3..4a3a5dd 100644
2933--- a/arch/x86/kernel/signal.c
2934+++ b/arch/x86/kernel/signal.c
2935@@ -808,6 +808,14 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
2936 mce_notify_process();
2937 #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
2938
2939+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
2940+ if (unlikely(current->forced_info.si_signo)) {
2941+ struct task_struct *t = current;
2942+ force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
2943+ t->forced_info.si_signo = 0;
2944+ }
2945+#endif
2946+
2947 if (thread_info_flags & _TIF_UPROBE)
2948 uprobe_notify_resume(regs);
2949
2950diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
2951index ecffca1..be18ff6 100644
2952--- a/arch/x86/kernel/traps.c
2953+++ b/arch/x86/kernel/traps.c
2954@@ -85,9 +85,21 @@ static inline void conditional_sti(struct pt_regs *regs)
2955 local_irq_enable();
2956 }
2957
2958-static inline void preempt_conditional_sti(struct pt_regs *regs)
2959+static inline void conditional_sti_ist(struct pt_regs *regs)
2960 {
2961+#ifdef CONFIG_X86_64
2962+ /*
2963+ * X86_64 uses a per CPU stack on the IST for certain traps
2964+ * like int3. The task can not be preempted when using one
2965+ * of these stacks, thus preemption must be disabled, otherwise
2966+ * the stack can be corrupted if the task is scheduled out,
2967+ * and another task comes in and uses this stack.
2968+ *
2969+ * On x86_32 the task keeps its own stack and it is OK if the
2970+ * task schedules out.
2971+ */
2972 inc_preempt_count();
2973+#endif
2974 if (regs->flags & X86_EFLAGS_IF)
2975 local_irq_enable();
2976 }
2977@@ -98,11 +110,13 @@ static inline void conditional_cli(struct pt_regs *regs)
2978 local_irq_disable();
2979 }
2980
2981-static inline void preempt_conditional_cli(struct pt_regs *regs)
2982+static inline void conditional_cli_ist(struct pt_regs *regs)
2983 {
2984 if (regs->flags & X86_EFLAGS_IF)
2985 local_irq_disable();
2986+#ifdef CONFIG_X86_64
2987 dec_preempt_count();
2988+#endif
2989 }
2990
2991 static int __kprobes
2992@@ -229,9 +243,9 @@ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
2993 exception_enter(regs);
2994 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
2995 X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
2996- preempt_conditional_sti(regs);
2997+ conditional_sti_ist(regs);
2998 do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
2999- preempt_conditional_cli(regs);
3000+ conditional_cli_ist(regs);
3001 }
3002 exception_exit(regs);
3003 }
3004@@ -331,9 +345,9 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
3005 * as we may switch to the interrupt stack.
3006 */
3007 debug_stack_usage_inc();
3008- preempt_conditional_sti(regs);
3009+ conditional_sti_ist(regs);
3010 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
3011- preempt_conditional_cli(regs);
3012+ conditional_cli_ist(regs);
3013 debug_stack_usage_dec();
3014 exit:
3015 exception_exit(regs);
3016@@ -438,12 +452,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
3017 debug_stack_usage_inc();
3018
3019 /* It's safe to allow irq's after DR6 has been saved */
3020- preempt_conditional_sti(regs);
3021+ conditional_sti_ist(regs);
3022
3023 if (regs->flags & X86_VM_MASK) {
3024 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
3025 X86_TRAP_DB);
3026- preempt_conditional_cli(regs);
3027+ conditional_cli_ist(regs);
3028 debug_stack_usage_dec();
3029 goto exit;
3030 }
3031@@ -463,7 +477,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
3032 si_code = get_si_code(tsk->thread.debugreg6);
3033 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
3034 send_sigtrap(tsk, regs, error_code, si_code);
3035- preempt_conditional_cli(regs);
3036+ conditional_cli_ist(regs);
3037 debug_stack_usage_dec();
3038
3039 exit:
3040diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
3041index 9a51121..fb485ba 100644
3042--- a/arch/x86/kvm/x86.c
3043+++ b/arch/x86/kvm/x86.c
3044@@ -5238,6 +5238,13 @@ int kvm_arch_init(void *opaque)
3045 goto out;
3046 }
3047
3048+#ifdef CONFIG_PREEMPT_RT_FULL
3049+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
3050+ printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
3051+ return -EOPNOTSUPP;
3052+ }
3053+#endif
3054+
3055 r = kvm_mmu_module_init();
3056 if (r)
3057 goto out_free_percpu;
3058diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
3059index 4f7d793..9cc2653 100644
3060--- a/arch/x86/mm/fault.c
3061+++ b/arch/x86/mm/fault.c
3062@@ -1110,7 +1110,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
3063 * If we're in an interrupt, have no user context or are running
3064 * in an atomic region then we must not take the fault:
3065 */
3066- if (unlikely(in_atomic() || !mm)) {
3067+ if (unlikely(!mm || pagefault_disabled())) {
3068 bad_area_nosemaphore(regs, error_code, address);
3069 return;
3070 }
3071diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
3072index 6f31ee5..01f7c99 100644
3073--- a/arch/x86/mm/highmem_32.c
3074+++ b/arch/x86/mm/highmem_32.c
3075@@ -31,6 +31,7 @@ EXPORT_SYMBOL(kunmap);
3076 */
3077 void *kmap_atomic_prot(struct page *page, pgprot_t prot)
3078 {
3079+ pte_t pte = mk_pte(page, prot);
3080 unsigned long vaddr;
3081 int idx, type;
3082
3083@@ -44,7 +45,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
3084 idx = type + KM_TYPE_NR*smp_processor_id();
3085 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3086 BUG_ON(!pte_none(*(kmap_pte-idx)));
3087- set_pte(kmap_pte-idx, mk_pte(page, prot));
3088+#ifdef CONFIG_PREEMPT_RT_FULL
3089+ current->kmap_pte[type] = pte;
3090+#endif
3091+ set_pte(kmap_pte-idx, pte);
3092 arch_flush_lazy_mmu_mode();
3093
3094 return (void *)vaddr;
3095@@ -87,6 +91,9 @@ void __kunmap_atomic(void *kvaddr)
3096 * is a bad idea also, in case the page changes cacheability
3097 * attributes or becomes a protected page in a hypervisor.
3098 */
3099+#ifdef CONFIG_PREEMPT_RT_FULL
3100+ current->kmap_pte[type] = __pte(0);
3101+#endif
3102 kpte_clear_flush(kmap_pte-idx, vaddr);
3103 kmap_atomic_idx_pop();
3104 arch_flush_lazy_mmu_mode();
3105diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
3106index 7b179b4..62377d6 100644
3107--- a/arch/x86/mm/iomap_32.c
3108+++ b/arch/x86/mm/iomap_32.c
3109@@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free);
3110
3111 void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
3112 {
3113+ pte_t pte = pfn_pte(pfn, prot);
3114 unsigned long vaddr;
3115 int idx, type;
3116
3117@@ -64,7 +65,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
3118 type = kmap_atomic_idx_push();
3119 idx = type + KM_TYPE_NR * smp_processor_id();
3120 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3121- set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
3122+ WARN_ON(!pte_none(*(kmap_pte - idx)));
3123+
3124+#ifdef CONFIG_PREEMPT_RT_FULL
3125+ current->kmap_pte[type] = pte;
3126+#endif
3127+ set_pte(kmap_pte - idx, pte);
3128 arch_flush_lazy_mmu_mode();
3129
3130 return (void *)vaddr;
3131@@ -110,6 +116,9 @@ iounmap_atomic(void __iomem *kvaddr)
3132 * is a bad idea also, in case the page changes cacheability
3133 * attributes or becomes a protected page in a hypervisor.
3134 */
3135+#ifdef CONFIG_PREEMPT_RT_FULL
3136+ current->kmap_pte[type] = __pte(0);
3137+#endif
3138 kpte_clear_flush(kmap_pte-idx, vaddr);
3139 kmap_atomic_idx_pop();
3140 }
3141diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
3142index 4b7bc8d..d57c257 100644
3143--- a/arch/xtensa/mm/fault.c
3144+++ b/arch/xtensa/mm/fault.c
3145@@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs)
3146 /* If we're in an interrupt or have no user
3147 * context, we must not take the fault..
3148 */
3149- if (in_atomic() || !mm) {
3150+ if (!mm || pagefault_disabled()) {
3151 bad_page_fault(regs, address, SIGSEGV);
3152 return;
3153 }
3154diff --git a/block/blk-core.c b/block/blk-core.c
3155index c973249..372ddb3 100644
3156--- a/block/blk-core.c
3157+++ b/block/blk-core.c
3158@@ -241,7 +241,7 @@ EXPORT_SYMBOL(blk_delay_queue);
3159 **/
3160 void blk_start_queue(struct request_queue *q)
3161 {
3162- WARN_ON(!irqs_disabled());
3163+ WARN_ON_NONRT(!irqs_disabled());
3164
3165 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
3166 __blk_run_queue(q);
3167@@ -2929,7 +2929,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
3168 blk_run_queue_async(q);
3169 else
3170 __blk_run_queue(q);
3171- spin_unlock(q->queue_lock);
3172+ spin_unlock_irq(q->queue_lock);
3173 }
3174
3175 static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
3176@@ -2977,7 +2977,6 @@ EXPORT_SYMBOL(blk_check_plugged);
3177 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3178 {
3179 struct request_queue *q;
3180- unsigned long flags;
3181 struct request *rq;
3182 LIST_HEAD(list);
3183 unsigned int depth;
3184@@ -2998,11 +2997,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3185 q = NULL;
3186 depth = 0;
3187
3188- /*
3189- * Save and disable interrupts here, to avoid doing it for every
3190- * queue lock we have to take.
3191- */
3192- local_irq_save(flags);
3193 while (!list_empty(&list)) {
3194 rq = list_entry_rq(list.next);
3195 list_del_init(&rq->queuelist);
3196@@ -3015,7 +3009,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3197 queue_unplugged(q, depth, from_schedule);
3198 q = rq->q;
3199 depth = 0;
3200- spin_lock(q->queue_lock);
3201+ spin_lock_irq(q->queue_lock);
3202 }
3203
3204 /*
3205@@ -3042,8 +3036,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3206 */
3207 if (q)
3208 queue_unplugged(q, depth, from_schedule);
3209-
3210- local_irq_restore(flags);
3211 }
3212
3213 void blk_finish_plug(struct blk_plug *plug)
3214diff --git a/block/blk-ioc.c b/block/blk-ioc.c
3215index fab4cdd..fb21ad5 100644
3216--- a/block/blk-ioc.c
3217+++ b/block/blk-ioc.c
3218@@ -8,6 +8,7 @@
3219 #include <linux/blkdev.h>
3220 #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
3221 #include <linux/slab.h>
3222+#include <linux/delay.h>
3223
3224 #include "blk.h"
3225
3226@@ -110,7 +111,7 @@ static void ioc_release_fn(struct work_struct *work)
3227 spin_unlock(q->queue_lock);
3228 } else {
3229 spin_unlock_irqrestore(&ioc->lock, flags);
3230- cpu_relax();
3231+ cpu_chill();
3232 spin_lock_irqsave_nested(&ioc->lock, flags, 1);
3233 }
3234 }
3235@@ -188,7 +189,7 @@ retry:
3236 spin_unlock(icq->q->queue_lock);
3237 } else {
3238 spin_unlock_irqrestore(&ioc->lock, flags);
3239- cpu_relax();
3240+ cpu_chill();
3241 goto retry;
3242 }
3243 }
3244diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
3245index 58916af..f7ca9b4 100644
3246--- a/block/blk-iopoll.c
3247+++ b/block/blk-iopoll.c
3248@@ -38,6 +38,7 @@ void blk_iopoll_sched(struct blk_iopoll *iop)
3249 list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
3250 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
3251 local_irq_restore(flags);
3252+ preempt_check_resched_rt();
3253 }
3254 EXPORT_SYMBOL(blk_iopoll_sched);
3255
3256@@ -135,6 +136,7 @@ static void blk_iopoll_softirq(struct softirq_action *h)
3257 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
3258
3259 local_irq_enable();
3260+ preempt_check_resched_rt();
3261 }
3262
3263 /**
3264@@ -204,6 +206,7 @@ static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self,
3265 &__get_cpu_var(blk_cpu_iopoll));
3266 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
3267 local_irq_enable();
3268+ preempt_check_resched_rt();
3269 }
3270
3271 return NOTIFY_OK;
3272diff --git a/block/blk-softirq.c b/block/blk-softirq.c
3273index 467c8de..3fe2368 100644
3274--- a/block/blk-softirq.c
3275+++ b/block/blk-softirq.c
3276@@ -51,6 +51,7 @@ static void trigger_softirq(void *data)
3277 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3278
3279 local_irq_restore(flags);
3280+ preempt_check_resched_rt();
3281 }
3282
3283 /*
3284@@ -93,6 +94,7 @@ static int __cpuinit blk_cpu_notify(struct notifier_block *self,
3285 &__get_cpu_var(blk_cpu_done));
3286 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3287 local_irq_enable();
3288+ preempt_check_resched_rt();
3289 }
3290
3291 return NOTIFY_OK;
3292@@ -150,6 +152,7 @@ do_local:
3293 goto do_local;
3294
3295 local_irq_restore(flags);
3296+ preempt_check_resched_rt();
3297 }
3298
3299 /**
3300diff --git a/crypto/algapi.c b/crypto/algapi.c
3301index c3b9bfe..3574066 100644
3302--- a/crypto/algapi.c
3303+++ b/crypto/algapi.c
3304@@ -683,13 +683,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2);
3305
3306 int crypto_register_notifier(struct notifier_block *nb)
3307 {
3308- return blocking_notifier_chain_register(&crypto_chain, nb);
3309+ return srcu_notifier_chain_register(&crypto_chain, nb);
3310 }
3311 EXPORT_SYMBOL_GPL(crypto_register_notifier);
3312
3313 int crypto_unregister_notifier(struct notifier_block *nb)
3314 {
3315- return blocking_notifier_chain_unregister(&crypto_chain, nb);
3316+ return srcu_notifier_chain_unregister(&crypto_chain, nb);
3317 }
3318 EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
3319
3320diff --git a/crypto/api.c b/crypto/api.c
3321index 033a714..8ff072c 100644
3322--- a/crypto/api.c
3323+++ b/crypto/api.c
3324@@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list);
3325 DECLARE_RWSEM(crypto_alg_sem);
3326 EXPORT_SYMBOL_GPL(crypto_alg_sem);
3327
3328-BLOCKING_NOTIFIER_HEAD(crypto_chain);
3329+SRCU_NOTIFIER_HEAD(crypto_chain);
3330 EXPORT_SYMBOL_GPL(crypto_chain);
3331
3332 static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
3333@@ -237,10 +237,10 @@ int crypto_probing_notify(unsigned long val, void *v)
3334 {
3335 int ok;
3336
3337- ok = blocking_notifier_call_chain(&crypto_chain, val, v);
3338+ ok = srcu_notifier_call_chain(&crypto_chain, val, v);
3339 if (ok == NOTIFY_DONE) {
3340 request_module("cryptomgr");
3341- ok = blocking_notifier_call_chain(&crypto_chain, val, v);
3342+ ok = srcu_notifier_call_chain(&crypto_chain, val, v);
3343 }
3344
3345 return ok;
3346diff --git a/crypto/internal.h b/crypto/internal.h
3347index 9ebedae..8cbe3dc 100644
3348--- a/crypto/internal.h
3349+++ b/crypto/internal.h
3350@@ -48,7 +48,7 @@ struct crypto_larval {
3351
3352 extern struct list_head crypto_alg_list;
3353 extern struct rw_semaphore crypto_alg_sem;
3354-extern struct blocking_notifier_head crypto_chain;
3355+extern struct srcu_notifier_head crypto_chain;
3356
3357 #ifdef CONFIG_PROC_FS
3358 void __init crypto_init_proc(void);
3359@@ -136,7 +136,7 @@ static inline int crypto_is_moribund(struct crypto_alg *alg)
3360
3361 static inline void crypto_notify(unsigned long val, void *v)
3362 {
3363- blocking_notifier_call_chain(&crypto_chain, val, v);
3364+ srcu_notifier_call_chain(&crypto_chain, val, v);
3365 }
3366
3367 #endif /* _CRYPTO_INTERNAL_H */
3368diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
3369index d8af325..ad3130d 100644
3370--- a/drivers/ata/libata-sff.c
3371+++ b/drivers/ata/libata-sff.c
3372@@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf,
3373 unsigned long flags;
3374 unsigned int consumed;
3375
3376- local_irq_save(flags);
3377+ local_irq_save_nort(flags);
3378 consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
3379- local_irq_restore(flags);
3380+ local_irq_restore_nort(flags);
3381
3382 return consumed;
3383 }
3384@@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
3385 unsigned long flags;
3386
3387 /* FIXME: use a bounce buffer */
3388- local_irq_save(flags);
3389+ local_irq_save_nort(flags);
3390 buf = kmap_atomic(page);
3391
3392 /* do the actual data transfer */
3393@@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
3394 do_write);
3395
3396 kunmap_atomic(buf);
3397- local_irq_restore(flags);
3398+ local_irq_restore_nort(flags);
3399 } else {
3400 buf = page_address(page);
3401 ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
3402@@ -864,7 +864,7 @@ next_sg:
3403 unsigned long flags;
3404
3405 /* FIXME: use bounce buffer */
3406- local_irq_save(flags);
3407+ local_irq_save_nort(flags);
3408 buf = kmap_atomic(page);
3409
3410 /* do the actual data transfer */
3411@@ -872,7 +872,7 @@ next_sg:
3412 count, rw);
3413
3414 kunmap_atomic(buf);
3415- local_irq_restore(flags);
3416+ local_irq_restore_nort(flags);
3417 } else {
3418 buf = page_address(page);
3419 consumed = ap->ops->sff_data_xfer(dev, buf + offset,
3420diff --git a/drivers/char/random.c b/drivers/char/random.c
3421index 57d4b15..9d6c416 100644
3422--- a/drivers/char/random.c
3423+++ b/drivers/char/random.c
3424@@ -445,7 +445,7 @@ static struct entropy_store input_pool = {
3425 .poolinfo = &poolinfo_table[0],
3426 .name = "input",
3427 .limit = 1,
3428- .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock),
3429+ .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
3430 .pool = input_pool_data
3431 };
3432
3433@@ -454,7 +454,7 @@ static struct entropy_store blocking_pool = {
3434 .name = "blocking",
3435 .limit = 1,
3436 .pull = &input_pool,
3437- .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock),
3438+ .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
3439 .pool = blocking_pool_data
3440 };
3441
3442@@ -462,7 +462,7 @@ static struct entropy_store nonblocking_pool = {
3443 .poolinfo = &poolinfo_table[1],
3444 .name = "nonblocking",
3445 .pull = &input_pool,
3446- .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock),
3447+ .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
3448 .pool = nonblocking_pool_data
3449 };
3450
3451@@ -676,9 +676,12 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
3452 preempt_disable();
3453 /* if over the trickle threshold, use only 1 in 4096 samples */
3454 if (input_pool.entropy_count > trickle_thresh &&
3455- ((__this_cpu_inc_return(trickle_count) - 1) & 0xfff))
3456- goto out;
3457+ ((__this_cpu_inc_return(trickle_count) - 1) & 0xfff)) {
3458+ preempt_enable();
3459+ return;
3460+ }
3461
3462+ preempt_enable();
3463 sample.jiffies = jiffies;
3464 sample.cycles = get_cycles();
3465 sample.num = num;
3466@@ -719,8 +722,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
3467 credit_entropy_bits(&input_pool,
3468 min_t(int, fls(delta>>1), 11));
3469 }
3470-out:
3471- preempt_enable();
3472 }
3473
3474 void add_input_randomness(unsigned int type, unsigned int code,
3475@@ -741,18 +742,16 @@ EXPORT_SYMBOL_GPL(add_input_randomness);
3476
3477 static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
3478
3479-void add_interrupt_randomness(int irq, int irq_flags)
3480+void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
3481 {
3482 struct entropy_store *r;
3483 struct fast_pool *fast_pool = &__get_cpu_var(irq_randomness);
3484- struct pt_regs *regs = get_irq_regs();
3485 unsigned long now = jiffies;
3486 __u32 input[4], cycles = get_cycles();
3487
3488 input[0] = cycles ^ jiffies;
3489 input[1] = irq;
3490- if (regs) {
3491- __u64 ip = instruction_pointer(regs);
3492+ if (ip) {
3493 input[2] = ip;
3494 input[3] = ip >> 32;
3495 }
3496@@ -766,7 +765,11 @@ void add_interrupt_randomness(int irq, int irq_flags)
3497 fast_pool->last = now;
3498
3499 r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
3500+#ifndef CONFIG_PREEMPT_RT_FULL
3501 __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool), NULL);
3502+#else
3503+ mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool), NULL);
3504+#endif
3505 /*
3506 * If we don't have a valid cycle counter, and we see
3507 * back-to-back timer interrupts, then skip giving credit for
3508diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
3509index 32cb929..ac0bb2e 100644
3510--- a/drivers/clocksource/tcb_clksrc.c
3511+++ b/drivers/clocksource/tcb_clksrc.c
3512@@ -23,8 +23,7 @@
3513 * this 32 bit free-running counter. the second channel is not used.
3514 *
3515 * - The third channel may be used to provide a 16-bit clockevent
3516- * source, used in either periodic or oneshot mode. This runs
3517- * at 32 KiHZ, and can handle delays of up to two seconds.
3518+ * source, used in either periodic or oneshot mode.
3519 *
3520 * A boot clocksource and clockevent source are also currently needed,
3521 * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
3522@@ -74,6 +73,7 @@ static struct clocksource clksrc = {
3523 struct tc_clkevt_device {
3524 struct clock_event_device clkevt;
3525 struct clk *clk;
3526+ u32 freq;
3527 void __iomem *regs;
3528 };
3529
3530@@ -82,13 +82,6 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt)
3531 return container_of(clkevt, struct tc_clkevt_device, clkevt);
3532 }
3533
3534-/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
3535- * because using one of the divided clocks would usually mean the
3536- * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
3537- *
3538- * A divided clock could be good for high resolution timers, since
3539- * 30.5 usec resolution can seem "low".
3540- */
3541 static u32 timer_clock;
3542
3543 static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
3544@@ -111,11 +104,12 @@ static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
3545 case CLOCK_EVT_MODE_PERIODIC:
3546 clk_enable(tcd->clk);
3547
3548- /* slow clock, count up to RC, then irq and restart */
3549+ /* count up to RC, then irq and restart */
3550 __raw_writel(timer_clock
3551 | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
3552 regs + ATMEL_TC_REG(2, CMR));
3553- __raw_writel((32768 + HZ/2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
3554+ __raw_writel((tcd->freq + HZ/2)/HZ,
3555+ tcaddr + ATMEL_TC_REG(2, RC));
3556
3557 /* Enable clock and interrupts on RC compare */
3558 __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
3559@@ -128,7 +122,7 @@ static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
3560 case CLOCK_EVT_MODE_ONESHOT:
3561 clk_enable(tcd->clk);
3562
3563- /* slow clock, count up to RC, then irq and stop */
3564+ /* count up to RC, then irq and stop */
3565 __raw_writel(timer_clock | ATMEL_TC_CPCSTOP
3566 | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
3567 regs + ATMEL_TC_REG(2, CMR));
3568@@ -158,8 +152,12 @@ static struct tc_clkevt_device clkevt = {
3569 .features = CLOCK_EVT_FEAT_PERIODIC
3570 | CLOCK_EVT_FEAT_ONESHOT,
3571 .shift = 32,
3572+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
3573 /* Should be lower than at91rm9200's system timer */
3574 .rating = 125,
3575+#else
3576+ .rating = 200,
3577+#endif
3578 .set_next_event = tc_next_event,
3579 .set_mode = tc_mode,
3580 },
3581@@ -185,8 +183,9 @@ static struct irqaction tc_irqaction = {
3582 .handler = ch2_irq,
3583 };
3584
3585-static void __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
3586+static void __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
3587 {
3588+ unsigned divisor = atmel_tc_divisors[divisor_idx];
3589 struct clk *t2_clk = tc->clk[2];
3590 int irq = tc->irq[2];
3591
3592@@ -194,11 +193,17 @@ static void __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
3593 clkevt.clk = t2_clk;
3594 tc_irqaction.dev_id = &clkevt;
3595
3596- timer_clock = clk32k_divisor_idx;
3597+ timer_clock = divisor_idx;
3598
3599- clkevt.clkevt.mult = div_sc(32768, NSEC_PER_SEC, clkevt.clkevt.shift);
3600- clkevt.clkevt.max_delta_ns
3601- = clockevent_delta2ns(0xffff, &clkevt.clkevt);
3602+ if (!divisor)
3603+ clkevt.freq = 32768;
3604+ else
3605+ clkevt.freq = clk_get_rate(t2_clk)/divisor;
3606+
3607+ clkevt.clkevt.mult = div_sc(clkevt.freq, NSEC_PER_SEC,
3608+ clkevt.clkevt.shift);
3609+ clkevt.clkevt.max_delta_ns =
3610+ clockevent_delta2ns(0xffff, &clkevt.clkevt);
3611 clkevt.clkevt.min_delta_ns = clockevent_delta2ns(1, &clkevt.clkevt) + 1;
3612 clkevt.clkevt.cpumask = cpumask_of(0);
3613
3614@@ -327,8 +332,11 @@ static int __init tcb_clksrc_init(void)
3615 clocksource_register_hz(&clksrc, divided_rate);
3616
3617 /* channel 2: periodic and oneshot timer support */
3618+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
3619 setup_clkevents(tc, clk32k_divisor_idx);
3620-
3621+#else
3622+ setup_clkevents(tc, best_divisor_idx);
3623+#endif
3624 return 0;
3625 }
3626 arch_initcall(tcb_clksrc_init);
3627diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
3628index 0577df8..4cebc9a 100644
3629--- a/drivers/dma/Makefile
3630+++ b/drivers/dma/Makefile
3631@@ -35,3 +35,4 @@ obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
3632 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
3633 obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
3634 obj-$(CONFIG_XILINX_DMA_ENGINES) += xilinx/
3635+obj-$(CONFIG_DMA_ENGINE) += of-dma.o
3636diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
3637index a815d44..e2c2230 100644
3638--- a/drivers/dma/dmaengine.c
3639+++ b/drivers/dma/dmaengine.c
3640@@ -501,7 +501,7 @@ static struct dma_chan *private_candidate(dma_cap_mask_t *mask, struct dma_devic
3641 * @fn: optional callback to disposition available channels
3642 * @fn_param: opaque parameter to pass to dma_filter_fn
3643 */
3644-struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param)
3645+struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param)
3646 {
3647 struct dma_device *device, *_d;
3648 struct dma_chan *chan = NULL;
3649diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
3650new file mode 100644
3651index 0000000..7aa0864
3652--- /dev/null
3653+++ b/drivers/dma/of-dma.c
3654@@ -0,0 +1,231 @@
3655+/*
3656+ * Device tree helpers for DMA request / controller
3657+ *
3658+ * Based on of_gpio.c
3659+ *
3660+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
3661+ *
3662+ * This program is free software; you can redistribute it and/or modify
3663+ * it under the terms of the GNU General Public License version 2 as
3664+ * published by the Free Software Foundation.
3665+ */
3666+
3667+#include <linux/device.h>
3668+#include <linux/err.h>
3669+#include <linux/module.h>
3670+#include <linux/mutex.h>
3671+#include <linux/slab.h>
3672+#include <linux/of.h>
3673+#include <linux/of_dma.h>
3674+
3675+static LIST_HEAD(of_dma_list);
3676+static DEFINE_MUTEX(of_dma_lock);
3677+
3678+/**
3679+ * of_dma_find_controller - Get a DMA controller in DT DMA helpers list
3680+ * @dma_spec: pointer to DMA specifier as found in the device tree
3681+ *
3682+ * Finds a DMA controller with matching device node and number for dma cells
3683+ * in a list of registered DMA controllers. If a match is found a valid pointer
3684+ * to the DMA data stored is retuned. A NULL pointer is returned if no match is
3685+ * found.
3686+ */
3687+static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec)
3688+{
3689+ struct of_dma *ofdma;
3690+
3691+ list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers)
3692+ if ((ofdma->of_node == dma_spec->np) &&
3693+ (ofdma->of_dma_nbcells == dma_spec->args_count))
3694+ return ofdma;
3695+
3696+ pr_debug("%s: can't find DMA controller %s\n", __func__,
3697+ dma_spec->np->full_name);
3698+
3699+ return NULL;
3700+}
3701+
3702+/**
3703+ * of_dma_controller_register - Register a DMA controller to DT DMA helpers
3704+ * @np: device node of DMA controller
3705+ * @of_dma_xlate: translation function which converts a phandle
3706+ * arguments list into a dma_chan structure
3707+ * @data pointer to controller specific data to be used by
3708+ * translation function
3709+ *
3710+ * Returns 0 on success or appropriate errno value on error.
3711+ *
3712+ * Allocated memory should be freed with appropriate of_dma_controller_free()
3713+ * call.
3714+ */
3715+int of_dma_controller_register(struct device_node *np,
3716+ struct dma_chan *(*of_dma_xlate)
3717+ (struct of_phandle_args *, struct of_dma *),
3718+ void *data)
3719+{
3720+ struct of_dma *ofdma;
3721+ int nbcells;
3722+ const __be32 *prop;
3723+
3724+ if (!np || !of_dma_xlate) {
3725+ pr_err("%s: not enough information provided\n", __func__);
3726+ return -EINVAL;
3727+ }
3728+
3729+ ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL);
3730+ if (!ofdma)
3731+ return -ENOMEM;
3732+
3733+ prop = of_get_property(np, "#dma-cells", NULL);
3734+ if (prop)
3735+ nbcells = be32_to_cpup(prop);
3736+
3737+ if (!prop || !nbcells) {
3738+ pr_err("%s: #dma-cells property is missing or invalid\n",
3739+ __func__);
3740+ kfree(ofdma);
3741+ return -EINVAL;
3742+ }
3743+
3744+ ofdma->of_node = np;
3745+ ofdma->of_dma_nbcells = nbcells;
3746+ ofdma->of_dma_xlate = of_dma_xlate;
3747+ ofdma->of_dma_data = data;
3748+
3749+ /* Now queue of_dma controller structure in list */
3750+ mutex_lock(&of_dma_lock);
3751+ list_add_tail(&ofdma->of_dma_controllers, &of_dma_list);
3752+ mutex_unlock(&of_dma_lock);
3753+
3754+ return 0;
3755+}
3756+EXPORT_SYMBOL_GPL(of_dma_controller_register);
3757+
3758+/**
3759+ * of_dma_controller_free - Remove a DMA controller from DT DMA helpers list
3760+ * @np: device node of DMA controller
3761+ *
3762+ * Memory allocated by of_dma_controller_register() is freed here.
3763+ */
3764+void of_dma_controller_free(struct device_node *np)
3765+{
3766+ struct of_dma *ofdma;
3767+
3768+ mutex_lock(&of_dma_lock);
3769+
3770+ list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers)
3771+ if (ofdma->of_node == np) {
3772+ list_del(&ofdma->of_dma_controllers);
3773+ kfree(ofdma);
3774+ break;
3775+ }
3776+
3777+ mutex_unlock(&of_dma_lock);
3778+}
3779+EXPORT_SYMBOL_GPL(of_dma_controller_free);
3780+
3781+/**
3782+ * of_dma_match_channel - Check if a DMA specifier matches name
3783+ * @np: device node to look for DMA channels
3784+ * @name: channel name to be matched
3785+ * @index: index of DMA specifier in list of DMA specifiers
3786+ * @dma_spec: pointer to DMA specifier as found in the device tree
3787+ *
3788+ * Check if the DMA specifier pointed to by the index in a list of DMA
3789+ * specifiers, matches the name provided. Returns 0 if the name matches and
3790+ * a valid pointer to the DMA specifier is found. Otherwise returns -ENODEV.
3791+ */
3792+static int of_dma_match_channel(struct device_node *np, const char *name,
3793+ int index, struct of_phandle_args *dma_spec)
3794+{
3795+ const char *s;
3796+
3797+ if (of_property_read_string_index(np, "dma-names", index, &s))
3798+ return -ENODEV;
3799+
3800+ if (strcmp(name, s))
3801+ return -ENODEV;
3802+
3803+ if (of_parse_phandle_with_args(np, "dmas", "#dma-cells", index,
3804+ dma_spec))
3805+ return -ENODEV;
3806+
3807+ return 0;
3808+}
3809+
3810+/**
3811+ * of_dma_request_slave_channel - Get the DMA slave channel
3812+ * @np: device node to get DMA request from
3813+ * @name: name of desired channel
3814+ *
3815+ * Returns pointer to appropriate dma channel on success or NULL on error.
3816+ */
3817+struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
3818+ const char *name)
3819+{
3820+ struct of_phandle_args dma_spec;
3821+ struct of_dma *ofdma;
3822+ struct dma_chan *chan;
3823+ int count, i;
3824+
3825+ if (!np || !name) {
3826+ pr_err("%s: not enough information provided\n", __func__);
3827+ return NULL;
3828+ }
3829+
3830+ count = of_property_count_strings(np, "dma-names");
3831+ if (count < 0) {
3832+ pr_err("%s: dma-names property missing or empty\n", __func__);
3833+ return NULL;
3834+ }
3835+
3836+ for (i = 0; i < count; i++) {
3837+ if (of_dma_match_channel(np, name, i, &dma_spec))
3838+ continue;
3839+
3840+ mutex_lock(&of_dma_lock);
3841+ ofdma = of_dma_find_controller(&dma_spec);
3842+
3843+ if (ofdma)
3844+ chan = ofdma->of_dma_xlate(&dma_spec, ofdma);
3845+ else
3846+ chan = NULL;
3847+
3848+ mutex_unlock(&of_dma_lock);
3849+
3850+ of_node_put(dma_spec.np);
3851+
3852+ if (chan)
3853+ return chan;
3854+ }
3855+
3856+ return NULL;
3857+}
3858+
3859+/**
3860+ * of_dma_simple_xlate - Simple DMA engine translation function
3861+ * @dma_spec: pointer to DMA specifier as found in the device tree
3862+ * @of_dma: pointer to DMA controller data
3863+ *
3864+ * A simple translation function for devices that use a 32-bit value for the
3865+ * filter_param when calling the DMA engine dma_request_channel() function.
3866+ * Note that this translation function requires that #dma-cells is equal to 1
3867+ * and the argument of the dma specifier is the 32-bit filter_param. Returns
3868+ * pointer to appropriate dma channel on success or NULL on error.
3869+ */
3870+struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
3871+ struct of_dma *ofdma)
3872+{
3873+ int count = dma_spec->args_count;
3874+ struct of_dma_filter_info *info = ofdma->of_dma_data;
3875+
3876+ if (!info || !info->filter_fn)
3877+ return NULL;
3878+
3879+ if (count != 1)
3880+ return NULL;
3881+
3882+ return dma_request_channel(info->dma_cap, info->filter_fn,
3883+ &dma_spec->args[0]);
3884+}
3885+EXPORT_SYMBOL_GPL(of_dma_simple_xlate);
3886diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
3887index 48d3fee..4c2f465 100644
3888--- a/drivers/dma/pl330.c
3889+++ b/drivers/dma/pl330.c
3890@@ -25,6 +25,8 @@
3891 #include <linux/amba/pl330.h>
3892 #include <linux/scatterlist.h>
3893 #include <linux/of.h>
3894+#include <linux/of_dma.h>
3895+#include <linux/err.h>
3896
3897 #include "dmaengine.h"
3898 #define PL330_MAX_CHAN 8
3899@@ -585,8 +587,6 @@ struct dma_pl330_dmac {
3900
3901 /* Peripheral channels connected to this DMAC */
3902 struct dma_pl330_chan *peripherals; /* keep at end */
3903-
3904- u32 dma_channels;
3905 };
3906
3907 struct dma_pl330_desc {
3908@@ -608,6 +608,11 @@ struct dma_pl330_desc {
3909 struct dma_pl330_chan *pchan;
3910 };
3911
3912+struct dma_pl330_filter_args {
3913+ struct dma_pl330_dmac *pdmac;
3914+ unsigned int chan_id;
3915+};
3916+
3917 static inline void _callback(struct pl330_req *r, enum pl330_op_err err)
3918 {
3919 if (r && r->xfer_cb)
3920@@ -2284,13 +2289,12 @@ static inline void fill_queue(struct dma_pl330_chan *pch)
3921
3922 /* If already submitted */
3923 if (desc->status == BUSY)
3924- break;
3925+ continue;
3926
3927 ret = pl330_submit_req(pch->pl330_chid,
3928 &desc->req);
3929 if (!ret) {
3930 desc->status = BUSY;
3931- break;
3932 } else if (ret == -EAGAIN) {
3933 /* QFull or DMAC Dying */
3934 break;
3935@@ -2354,6 +2358,16 @@ static void dma_pl330_rqcb(void *token, enum pl330_op_err err)
3936 tasklet_schedule(&pch->task);
3937 }
3938
3939+static bool pl330_dt_filter(struct dma_chan *chan, void *param)
3940+{
3941+ struct dma_pl330_filter_args *fargs = param;
3942+
3943+ if (chan->device != &fargs->pdmac->ddma)
3944+ return false;
3945+
3946+ return (chan->chan_id == fargs->chan_id);
3947+}
3948+
3949 bool pl330_filter(struct dma_chan *chan, void *param)
3950 {
3951 u8 *peri_id;
3952@@ -2361,25 +2375,35 @@ bool pl330_filter(struct dma_chan *chan, void *param)
3953 if (chan->device->dev->driver != &pl330_driver.drv)
3954 return false;
3955
3956-#ifdef CONFIG_OF
3957- if (chan->device->dev->of_node) {
3958- const __be32 *prop_value;
3959- phandle phandle;
3960- struct device_node *node;
3961-
3962- prop_value = ((struct property *)param)->value;
3963- phandle = be32_to_cpup(prop_value++);
3964- node = of_find_node_by_phandle(phandle);
3965- return ((chan->private == node) &&
3966- (chan->chan_id == be32_to_cpup(prop_value)));
3967- }
3968-#endif
3969-
3970 peri_id = chan->private;
3971 return *peri_id == (unsigned)param;
3972 }
3973 EXPORT_SYMBOL(pl330_filter);
3974
3975+static struct dma_chan *of_dma_pl330_xlate(struct of_phandle_args *dma_spec,
3976+ struct of_dma *ofdma)
3977+{
3978+ int count = dma_spec->args_count;
3979+ struct dma_pl330_dmac *pdmac = ofdma->of_dma_data;
3980+ struct dma_pl330_filter_args fargs;
3981+ dma_cap_mask_t cap;
3982+
3983+ if (!pdmac)
3984+ return NULL;
3985+
3986+ if (count != 1)
3987+ return NULL;
3988+
3989+ fargs.pdmac = pdmac;
3990+ fargs.chan_id = dma_spec->args[0];
3991+
3992+ dma_cap_zero(cap);
3993+ dma_cap_set(DMA_SLAVE, cap);
3994+ dma_cap_set(DMA_CYCLIC, cap);
3995+
3996+ return dma_request_channel(cap, pl330_dt_filter, &fargs);
3997+}
3998+
3999 static int pl330_alloc_chan_resources(struct dma_chan *chan)
4000 {
4001 struct dma_pl330_chan *pch = to_pchan(chan);
4002@@ -2461,10 +2485,10 @@ static void pl330_free_chan_resources(struct dma_chan *chan)
4003 struct dma_pl330_chan *pch = to_pchan(chan);
4004 unsigned long flags;
4005
4006- spin_lock_irqsave(&pch->lock, flags);
4007-
4008 tasklet_kill(&pch->task);
4009
4010+ spin_lock_irqsave(&pch->lock, flags);
4011+
4012 pl330_release_channel(pch->pl330_chid);
4013 pch->pl330_chid = NULL;
4014
4015@@ -2503,6 +2527,10 @@ static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx)
4016 /* Assign cookies to all nodes */
4017 while (!list_empty(&last->node)) {
4018 desc = list_entry(last->node.next, struct dma_pl330_desc, node);
4019+ if (pch->cyclic) {
4020+ desc->txd.callback = last->txd.callback;
4021+ desc->txd.callback_param = last->txd.callback_param;
4022+ }
4023
4024 dma_cookie_assign(&desc->txd);
4025
4026@@ -2686,45 +2714,82 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
4027 size_t period_len, enum dma_transfer_direction direction,
4028 unsigned long flags, void *context)
4029 {
4030- struct dma_pl330_desc *desc;
4031+ struct dma_pl330_desc *desc = NULL, *first = NULL;
4032 struct dma_pl330_chan *pch = to_pchan(chan);
4033+ struct dma_pl330_dmac *pdmac = pch->dmac;
4034+ unsigned int i;
4035 dma_addr_t dst;
4036 dma_addr_t src;
4037
4038- desc = pl330_get_desc(pch);
4039- if (!desc) {
4040- dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n",
4041- __func__, __LINE__);
4042+ if (len % period_len != 0)
4043 return NULL;
4044- }
4045
4046- switch (direction) {
4047- case DMA_MEM_TO_DEV:
4048- desc->rqcfg.src_inc = 1;
4049- desc->rqcfg.dst_inc = 0;
4050- desc->req.rqtype = MEMTODEV;
4051- src = dma_addr;
4052- dst = pch->fifo_addr;
4053- break;
4054- case DMA_DEV_TO_MEM:
4055- desc->rqcfg.src_inc = 0;
4056- desc->rqcfg.dst_inc = 1;
4057- desc->req.rqtype = DEVTOMEM;
4058- src = pch->fifo_addr;
4059- dst = dma_addr;
4060- break;
4061- default:
4062+ if (!is_slave_direction(direction)) {
4063 dev_err(pch->dmac->pif.dev, "%s:%d Invalid dma direction\n",
4064 __func__, __LINE__);
4065 return NULL;
4066 }
4067
4068- desc->rqcfg.brst_size = pch->burst_sz;
4069- desc->rqcfg.brst_len = 1;
4070+ for (i = 0; i < len / period_len; i++) {
4071+ desc = pl330_get_desc(pch);
4072+ if (!desc) {
4073+ dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n",
4074+ __func__, __LINE__);
4075+
4076+ if (!first)
4077+ return NULL;
4078
4079- pch->cyclic = true;
4080+ spin_lock_irqsave(&pdmac->pool_lock, flags);
4081+
4082+ while (!list_empty(&first->node)) {
4083+ desc = list_entry(first->node.next,
4084+ struct dma_pl330_desc, node);
4085+ list_move_tail(&desc->node, &pdmac->desc_pool);
4086+ }
4087+
4088+ list_move_tail(&first->node, &pdmac->desc_pool);
4089+
4090+ spin_unlock_irqrestore(&pdmac->pool_lock, flags);
4091+
4092+ return NULL;
4093+ }
4094+
4095+ switch (direction) {
4096+ case DMA_MEM_TO_DEV:
4097+ desc->rqcfg.src_inc = 1;
4098+ desc->rqcfg.dst_inc = 0;
4099+ desc->req.rqtype = MEMTODEV;
4100+ src = dma_addr;
4101+ dst = pch->fifo_addr;
4102+ break;
4103+ case DMA_DEV_TO_MEM:
4104+ desc->rqcfg.src_inc = 0;
4105+ desc->rqcfg.dst_inc = 1;
4106+ desc->req.rqtype = DEVTOMEM;
4107+ src = pch->fifo_addr;
4108+ dst = dma_addr;
4109+ break;
4110+ default:
4111+ break;
4112+ }
4113
4114- fill_px(&desc->px, dst, src, period_len);
4115+ desc->rqcfg.brst_size = pch->burst_sz;
4116+ desc->rqcfg.brst_len = 1;
4117+ fill_px(&desc->px, dst, src, period_len);
4118+
4119+ if (!first)
4120+ first = desc;
4121+ else
4122+ list_add_tail(&desc->node, &first->node);
4123+
4124+ dma_addr += period_len;
4125+ }
4126+
4127+ if (!desc)
4128+ return NULL;
4129+
4130+ pch->cyclic = true;
4131+ desc->txd.flags = flags;
4132
4133 return &desc->txd;
4134 }
4135@@ -2858,7 +2923,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
4136 {
4137 struct dma_pl330_platdata *pdat;
4138 struct dma_pl330_dmac *pdmac;
4139- struct dma_pl330_chan *pch;
4140+ struct dma_pl330_chan *pch, *_p;
4141 struct pl330_info *pi;
4142 struct dma_device *pd;
4143 struct resource *res;
4144@@ -2868,7 +2933,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
4145 pdat = adev->dev.platform_data;
4146
4147 /* Allocate a new DMAC and its Channels */
4148- pdmac = kzalloc(sizeof(*pdmac), GFP_KERNEL);
4149+ pdmac = devm_kzalloc(&adev->dev, sizeof(*pdmac), GFP_KERNEL);
4150 if (!pdmac) {
4151 dev_err(&adev->dev, "unable to allocate mem\n");
4152 return -ENOMEM;
4153@@ -2880,31 +2945,21 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
4154 pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
4155
4156 res = &adev->res;
4157- request_mem_region(res->start, resource_size(res), "dma-pl330");
4158-
4159- pi->base = ioremap(res->start, resource_size(res));
4160- if (!pi->base) {
4161- ret = -ENXIO;
4162- goto probe_err1;
4163- }
4164+ pi->base = devm_ioremap_resource(&adev->dev, res);
4165+ if (IS_ERR(pi->base))
4166+ return PTR_ERR(pi->base);
4167
4168 amba_set_drvdata(adev, pdmac);
4169
4170- of_property_read_u32(adev->dev.of_node,
4171- "#dma-channels", &pdmac->dma_channels);
4172-
4173- /* irq 0 is abort IRQ */
4174- for (i = 1; i <= pdmac->dma_channels ; i++) {
4175- irq = adev->irq[i];
4176- ret = request_irq(irq, pl330_irq_handler, 0,
4177- dev_name(&adev->dev), pi);
4178- if (ret)
4179- goto probe_err3;
4180- }
4181+ irq = adev->irq[0];
4182+ ret = request_irq(irq, pl330_irq_handler, 0,
4183+ dev_name(&adev->dev), pi);
4184+ if (ret)
4185+ return ret;
4186
4187 ret = pl330_add(pi);
4188 if (ret)
4189- goto probe_err3;
4190+ goto probe_err1;
4191
4192 INIT_LIST_HEAD(&pdmac->desc_pool);
4193 spin_lock_init(&pdmac->pool_lock);
4194@@ -2926,7 +2981,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
4195 if (!pdmac->peripherals) {
4196 ret = -ENOMEM;
4197 dev_err(&adev->dev, "unable to allocate pdmac->peripherals\n");
4198- goto probe_err4;
4199+ goto probe_err2;
4200 }
4201
4202 for (i = 0; i < num_chan; i++) {
4203@@ -2970,7 +3025,16 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
4204 ret = dma_async_device_register(pd);
4205 if (ret) {
4206 dev_err(&adev->dev, "unable to register DMAC\n");
4207- goto probe_err4;
4208+ goto probe_err3;
4209+ }
4210+
4211+ if (adev->dev.of_node) {
4212+ ret = of_dma_controller_register(adev->dev.of_node,
4213+ of_dma_pl330_xlate, pdmac);
4214+ if (ret) {
4215+ dev_err(&adev->dev,
4216+ "unable to register DMA to the generic DT DMA helpers\n");
4217+ }
4218 }
4219
4220 dev_info(&adev->dev,
4221@@ -2982,17 +3046,24 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
4222 pi->pcfg.num_peri, pi->pcfg.num_events);
4223
4224 return 0;
4225-
4226-probe_err4:
4227- pl330_del(pi);
4228 probe_err3:
4229- for (i = 1; i <= pdmac->dma_channels ; i++)
4230- free_irq(adev->irq[i], pi);
4231+ amba_set_drvdata(adev, NULL);
4232+
4233+ /* Idle the DMAC */
4234+ list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
4235+ chan.device_node) {
4236+
4237+ /* Remove the channel */
4238+ list_del(&pch->chan.device_node);
4239+
4240+ /* Flush the channel */
4241+ pl330_control(&pch->chan, DMA_TERMINATE_ALL, 0);
4242+ pl330_free_chan_resources(&pch->chan);
4243+ }
4244 probe_err2:
4245- iounmap(pi->base);
4246+ pl330_del(pi);
4247 probe_err1:
4248- release_mem_region(res->start, resource_size(res));
4249- kfree(pdmac);
4250+ free_irq(irq, pi);
4251
4252 return ret;
4253 }
4254@@ -3002,12 +3073,15 @@ static int pl330_remove(struct amba_device *adev)
4255 struct dma_pl330_dmac *pdmac = amba_get_drvdata(adev);
4256 struct dma_pl330_chan *pch, *_p;
4257 struct pl330_info *pi;
4258- struct resource *res;
4259- int i;
4260+ int irq;
4261
4262 if (!pdmac)
4263 return 0;
4264
4265+ if (adev->dev.of_node)
4266+ of_dma_controller_free(adev->dev.of_node);
4267+
4268+ dma_async_device_unregister(&pdmac->ddma);
4269 amba_set_drvdata(adev, NULL);
4270
4271 /* Idle the DMAC */
4272@@ -3026,15 +3100,8 @@ static int pl330_remove(struct amba_device *adev)
4273
4274 pl330_del(pi);
4275
4276- for (i = 1; i <= pdmac->dma_channels ; i++)
4277- free_irq(adev->irq[i], pi);
4278-
4279- iounmap(pi->base);
4280-
4281- res = &adev->res;
4282- release_mem_region(res->start, resource_size(res));
4283-
4284- kfree(pdmac);
4285+ irq = adev->irq[0];
4286+ free_irq(irq, pi);
4287
4288 return 0;
4289 }
4290diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
4291index de45b60..9b51712 100644
4292--- a/drivers/gpu/drm/i915/i915_gem.c
4293+++ b/drivers/gpu/drm/i915/i915_gem.c
4294@@ -91,7 +91,6 @@ i915_gem_wait_for_error(struct drm_device *dev)
4295 {
4296 struct drm_i915_private *dev_priv = dev->dev_private;
4297 struct completion *x = &dev_priv->error_completion;
4298- unsigned long flags;
4299 int ret;
4300
4301 if (!atomic_read(&dev_priv->mm.wedged))
4302@@ -116,9 +115,7 @@ i915_gem_wait_for_error(struct drm_device *dev)
4303 * end up waiting upon a subsequent completion event that
4304 * will never happen.
4305 */
4306- spin_lock_irqsave(&x->wait.lock, flags);
4307- x->done++;
4308- spin_unlock_irqrestore(&x->wait.lock, flags);
4309+ complete(x);
4310 }
4311 return 0;
4312 }
4313@@ -946,12 +943,9 @@ i915_gem_check_wedge(struct drm_i915_private *dev_priv,
4314 if (atomic_read(&dev_priv->mm.wedged)) {
4315 struct completion *x = &dev_priv->error_completion;
4316 bool recovery_complete;
4317- unsigned long flags;
4318
4319 /* Give the error handler a chance to run. */
4320- spin_lock_irqsave(&x->wait.lock, flags);
4321- recovery_complete = x->done > 0;
4322- spin_unlock_irqrestore(&x->wait.lock, flags);
4323+ recovery_complete = completion_done(x);
4324
4325 /* Non-interruptible callers can't handle -EAGAIN, hence return
4326 * -EIO unconditionally for these. */
4327@@ -4366,7 +4360,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
4328 if (!mutex_is_locked(mutex))
4329 return false;
4330
4331-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
4332+#if (defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)) && !defined(CONFIG_PREEMPT_RT_BASE)
4333 return mutex->owner == task;
4334 #else
4335 /* Since UP may be pre-empted, we cannot assume that we own the lock */
4336diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
4337index ba8805a..eabd3dd 100644
4338--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
4339+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
4340@@ -814,6 +814,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
4341 struct intel_ring_buffer *ring;
4342 u32 ctx_id = i915_execbuffer2_get_context_id(*args);
4343 u32 exec_start, exec_len;
4344+ u32 seqno;
4345 u32 mask;
4346 u32 flags;
4347 int ret, mode, i;
4348@@ -1068,7 +1069,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
4349 goto err;
4350 }
4351
4352- trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
4353+ seqno = intel_ring_get_seqno(ring);
4354+ trace_i915_gem_ring_dispatch(ring, seqno, flags);
4355+ i915_trace_irq_get(ring, seqno);
4356
4357 i915_gem_execbuffer_move_to_active(&objects, ring);
4358 i915_gem_execbuffer_retire_commands(dev, file, ring);
4359diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
4360index 3db4a68..29217db 100644
4361--- a/drivers/gpu/drm/i915/i915_trace.h
4362+++ b/drivers/gpu/drm/i915/i915_trace.h
4363@@ -244,7 +244,6 @@ TRACE_EVENT(i915_gem_ring_dispatch,
4364 __entry->ring = ring->id;
4365 __entry->seqno = seqno;
4366 __entry->flags = flags;
4367- i915_trace_irq_get(ring, seqno);
4368 ),
4369
4370 TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
4371diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
4372index 6af87cd..8b5e4ae 100644
4373--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
4374+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
4375@@ -232,8 +232,10 @@ static inline u32 intel_ring_get_seqno(struct intel_ring_buffer *ring)
4376
4377 static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno)
4378 {
4379+#ifdef CONFIG_TRACEPOINTS
4380 if (ring->trace_irq_seqno == 0 && ring->irq_get(ring))
4381 ring->trace_irq_seqno = seqno;
4382+#endif
4383 }
4384
4385 /* DRI warts */
4386diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
4387index 4cc2f05..bb51488 100644
4388--- a/drivers/i2c/busses/i2c-omap.c
4389+++ b/drivers/i2c/busses/i2c-omap.c
4390@@ -881,15 +881,12 @@ omap_i2c_isr(int irq, void *dev_id)
4391 u16 mask;
4392 u16 stat;
4393
4394- spin_lock(&dev->lock);
4395- mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
4396 stat = omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG);
4397+ mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
4398
4399 if (stat & mask)
4400 ret = IRQ_WAKE_THREAD;
4401
4402- spin_unlock(&dev->lock);
4403-
4404 return ret;
4405 }
4406
4407diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
4408index 36f76e2..394f142f 100644
4409--- a/drivers/ide/alim15x3.c
4410+++ b/drivers/ide/alim15x3.c
4411@@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev)
4412
4413 isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
4414
4415- local_irq_save(flags);
4416+ local_irq_save_nort(flags);
4417
4418 if (m5229_revision < 0xC2) {
4419 /*
4420@@ -325,7 +325,7 @@ out:
4421 }
4422 pci_dev_put(north);
4423 pci_dev_put(isa_dev);
4424- local_irq_restore(flags);
4425+ local_irq_restore_nort(flags);
4426 return 0;
4427 }
4428
4429diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
4430index 696b6c1..0d0a966 100644
4431--- a/drivers/ide/hpt366.c
4432+++ b/drivers/ide/hpt366.c
4433@@ -1241,7 +1241,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif,
4434
4435 dma_old = inb(base + 2);
4436
4437- local_irq_save(flags);
4438+ local_irq_save_nort(flags);
4439
4440 dma_new = dma_old;
4441 pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
4442@@ -1252,7 +1252,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif,
4443 if (dma_new != dma_old)
4444 outb(dma_new, base + 2);
4445
4446- local_irq_restore(flags);
4447+ local_irq_restore_nort(flags);
4448
4449 printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
4450 hwif->name, base, base + 7);
4451diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c
4452index 1976397..4169433 100644
4453--- a/drivers/ide/ide-io-std.c
4454+++ b/drivers/ide/ide-io-std.c
4455@@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
4456 unsigned long uninitialized_var(flags);
4457
4458 if ((io_32bit & 2) && !mmio) {
4459- local_irq_save(flags);
4460+ local_irq_save_nort(flags);
4461 ata_vlb_sync(io_ports->nsect_addr);
4462 }
4463
4464@@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
4465 insl(data_addr, buf, words);
4466
4467 if ((io_32bit & 2) && !mmio)
4468- local_irq_restore(flags);
4469+ local_irq_restore_nort(flags);
4470
4471 if (((len + 1) & 3) < 2)
4472 return;
4473@@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
4474 unsigned long uninitialized_var(flags);
4475
4476 if ((io_32bit & 2) && !mmio) {
4477- local_irq_save(flags);
4478+ local_irq_save_nort(flags);
4479 ata_vlb_sync(io_ports->nsect_addr);
4480 }
4481
4482@@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
4483 outsl(data_addr, buf, words);
4484
4485 if ((io_32bit & 2) && !mmio)
4486- local_irq_restore(flags);
4487+ local_irq_restore_nort(flags);
4488
4489 if (((len + 1) & 3) < 2)
4490 return;
4491diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
4492index 177db6d..079ae6b 100644
4493--- a/drivers/ide/ide-io.c
4494+++ b/drivers/ide/ide-io.c
4495@@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long data)
4496 /* disable_irq_nosync ?? */
4497 disable_irq(hwif->irq);
4498 /* local CPU only, as if we were handling an interrupt */
4499- local_irq_disable();
4500+ local_irq_disable_nort();
4501 if (hwif->polling) {
4502 startstop = handler(drive);
4503 } else if (drive_is_ready(drive)) {
4504diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
4505index 376f2dc..f014dd1 100644
4506--- a/drivers/ide/ide-iops.c
4507+++ b/drivers/ide/ide-iops.c
4508@@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad,
4509 if ((stat & ATA_BUSY) == 0)
4510 break;
4511
4512- local_irq_restore(flags);
4513+ local_irq_restore_nort(flags);
4514 *rstat = stat;
4515 return -EBUSY;
4516 }
4517 }
4518- local_irq_restore(flags);
4519+ local_irq_restore_nort(flags);
4520 }
4521 /*
4522 * Allow status to settle, then read it again.
4523diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
4524index 068cef0..38e69e1 100644
4525--- a/drivers/ide/ide-probe.c
4526+++ b/drivers/ide/ide-probe.c
4527@@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id)
4528 int bswap = 1;
4529
4530 /* local CPU only; some systems need this */
4531- local_irq_save(flags);
4532+ local_irq_save_nort(flags);
4533 /* read 512 bytes of id info */
4534 hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
4535- local_irq_restore(flags);
4536+ local_irq_restore_nort(flags);
4537
4538 drive->dev_flags |= IDE_DFLAG_ID_READ;
4539 #ifdef DEBUG
4540diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
4541index 729428e..3a9a1fc 100644
4542--- a/drivers/ide/ide-taskfile.c
4543+++ b/drivers/ide/ide-taskfile.c
4544@@ -251,7 +251,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd,
4545
4546 page_is_high = PageHighMem(page);
4547 if (page_is_high)
4548- local_irq_save(flags);
4549+ local_irq_save_nort(flags);
4550
4551 buf = kmap_atomic(page) + offset;
4552
4553@@ -272,7 +272,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd,
4554 kunmap_atomic(buf);
4555
4556 if (page_is_high)
4557- local_irq_restore(flags);
4558+ local_irq_restore_nort(flags);
4559
4560 len -= nr_bytes;
4561 }
4562@@ -415,7 +415,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive,
4563 }
4564
4565 if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
4566- local_irq_disable();
4567+ local_irq_disable_nort();
4568
4569 ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
4570
4571diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c
4572index fa080eb..ffeebc7 100644
4573--- a/drivers/idle/i7300_idle.c
4574+++ b/drivers/idle/i7300_idle.c
4575@@ -75,7 +75,7 @@ static unsigned long past_skip;
4576
4577 static struct pci_dev *fbd_dev;
4578
4579-static spinlock_t i7300_idle_lock;
4580+static raw_spinlock_t i7300_idle_lock;
4581 static int i7300_idle_active;
4582
4583 static u8 i7300_idle_thrtctl_saved;
4584@@ -457,7 +457,7 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
4585 idle_begin_time = ktime_get();
4586 }
4587
4588- spin_lock_irqsave(&i7300_idle_lock, flags);
4589+ raw_spin_lock_irqsave(&i7300_idle_lock, flags);
4590 if (val == IDLE_START) {
4591
4592 cpumask_set_cpu(smp_processor_id(), idle_cpumask);
4593@@ -506,7 +506,7 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
4594 }
4595 }
4596 end:
4597- spin_unlock_irqrestore(&i7300_idle_lock, flags);
4598+ raw_spin_unlock_irqrestore(&i7300_idle_lock, flags);
4599 return 0;
4600 }
4601
4602@@ -548,7 +548,7 @@ struct debugfs_file_info {
4603
4604 static int __init i7300_idle_init(void)
4605 {
4606- spin_lock_init(&i7300_idle_lock);
4607+ raw_spin_lock_init(&i7300_idle_lock);
4608 total_us = 0;
4609
4610 if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
4611diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
4612index cecb98a..3800ef5 100644
4613--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
4614+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
4615@@ -783,7 +783,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
4616
4617 ipoib_mcast_stop_thread(dev, 0);
4618
4619- local_irq_save(flags);
4620+ local_irq_save_nort(flags);
4621 netif_addr_lock(dev);
4622 spin_lock(&priv->lock);
4623
4624@@ -865,7 +865,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
4625
4626 spin_unlock(&priv->lock);
4627 netif_addr_unlock(dev);
4628- local_irq_restore(flags);
4629+ local_irq_restore_nort(flags);
4630
4631 /* We have to cancel outside of the spinlock */
4632 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
4633diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
4634index da739d9..18fdafe 100644
4635--- a/drivers/input/gameport/gameport.c
4636+++ b/drivers/input/gameport/gameport.c
4637@@ -87,12 +87,12 @@ static int gameport_measure_speed(struct gameport *gameport)
4638 tx = 1 << 30;
4639
4640 for(i = 0; i < 50; i++) {
4641- local_irq_save(flags);
4642+ local_irq_save_nort(flags);
4643 GET_TIME(t1);
4644 for (t = 0; t < 50; t++) gameport_read(gameport);
4645 GET_TIME(t2);
4646 GET_TIME(t3);
4647- local_irq_restore(flags);
4648+ local_irq_restore_nort(flags);
4649 udelay(i * 10);
4650 if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
4651 }
4652@@ -111,11 +111,11 @@ static int gameport_measure_speed(struct gameport *gameport)
4653 tx = 1 << 30;
4654
4655 for(i = 0; i < 50; i++) {
4656- local_irq_save(flags);
4657+ local_irq_save_nort(flags);
4658 rdtscl(t1);
4659 for (t = 0; t < 50; t++) gameport_read(gameport);
4660 rdtscl(t2);
4661- local_irq_restore(flags);
4662+ local_irq_restore_nort(flags);
4663 udelay(i * 10);
4664 if (t2 - t1 < tx) tx = t2 - t1;
4665 }
4666diff --git a/drivers/md/dm.c b/drivers/md/dm.c
4667index 0d8f086..0e1699e 100644
4668--- a/drivers/md/dm.c
4669+++ b/drivers/md/dm.c
4670@@ -1677,14 +1677,14 @@ static void dm_request_fn(struct request_queue *q)
4671 if (map_request(ti, clone, md))
4672 goto requeued;
4673
4674- BUG_ON(!irqs_disabled());
4675+ BUG_ON_NONRT(!irqs_disabled());
4676 spin_lock(q->queue_lock);
4677 }
4678
4679 goto out;
4680
4681 requeued:
4682- BUG_ON(!irqs_disabled());
4683+ BUG_ON_NONRT(!irqs_disabled());
4684 spin_lock(q->queue_lock);
4685
4686 delay_and_out:
4687diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
4688index 94ce78e..0089cb1 100644
4689--- a/drivers/md/raid5.c
4690+++ b/drivers/md/raid5.c
4691@@ -1418,8 +1418,9 @@ static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
4692 struct raid5_percpu *percpu;
4693 unsigned long cpu;
4694
4695- cpu = get_cpu();
4696+ cpu = get_cpu_light();
4697 percpu = per_cpu_ptr(conf->percpu, cpu);
4698+ spin_lock(&percpu->lock);
4699 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
4700 ops_run_biofill(sh);
4701 overlap_clear++;
4702@@ -1471,7 +1472,8 @@ static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
4703 if (test_and_clear_bit(R5_Overlap, &dev->flags))
4704 wake_up(&sh->raid_conf->wait_for_overlap);
4705 }
4706- put_cpu();
4707+ spin_unlock(&percpu->lock);
4708+ put_cpu_light();
4709 }
4710
4711 #ifdef CONFIG_MULTICORE_RAID456
4712@@ -5139,6 +5141,7 @@ static int raid5_alloc_percpu(struct r5conf *conf)
4713 break;
4714 }
4715 per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
4716+ spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
4717 }
4718 #ifdef CONFIG_HOTPLUG_CPU
4719 conf->cpu_notify.notifier_call = raid456_cpu_notify;
4720diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
4721index 050a334..8a57647 100644
4722--- a/drivers/md/raid5.h
4723+++ b/drivers/md/raid5.h
4724@@ -429,6 +429,7 @@ struct r5conf {
4725 int recovery_disabled;
4726 /* per cpu variables */
4727 struct raid5_percpu {
4728+ spinlock_t lock; /* Protection for -RT */
4729 struct page *spare_page; /* Used when checking P/Q in raid6 */
4730 void *scribble; /* space for constructing buffer
4731 * lists and performing address
4732diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
4733index e77d1d0..9407919 100644
4734--- a/drivers/misc/Kconfig
4735+++ b/drivers/misc/Kconfig
4736@@ -63,6 +63,7 @@ config ATMEL_PWM
4737 config ATMEL_TCLIB
4738 bool "Atmel AT32/AT91 Timer/Counter Library"
4739 depends on (AVR32 || ARCH_AT91)
4740+ default y if PREEMPT_RT_FULL
4741 help
4742 Select this if you want a library to allocate the Timer/Counter
4743 blocks found on many Atmel processors. This facilitates using
4744@@ -78,8 +79,7 @@ config ATMEL_TCB_CLKSRC
4745 are combined to make a single 32-bit timer.
4746
4747 When GENERIC_CLOCKEVENTS is defined, the third timer channel
4748- may be used as a clock event device supporting oneshot mode
4749- (delays of up to two seconds) based on the 32 KiHz clock.
4750+ may be used as a clock event device supporting oneshot mode.
4751
4752 config ATMEL_TCB_CLKSRC_BLOCK
4753 int
4754@@ -93,6 +93,14 @@ config ATMEL_TCB_CLKSRC_BLOCK
4755 TC can be used for other purposes, such as PWM generation and
4756 interval timing.
4757
4758+config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
4759+ bool "TC Block use 32 KiHz clock"
4760+ depends on ATMEL_TCB_CLKSRC
4761+ default y if !PREEMPT_RT_FULL
4762+ help
4763+ Select this to use 32 KiHz base clock rate as TC block clock
4764+ source for clock events.
4765+
4766 config IBM_ASM
4767 tristate "Device driver for IBM RSA service processor"
4768 depends on X86 && PCI && INPUT
4769@@ -114,6 +122,35 @@ config IBM_ASM
4770 for information on the specific driver level and support statement
4771 for your IBM server.
4772
4773+config HWLAT_DETECTOR
4774+ tristate "Testing module to detect hardware-induced latencies"
4775+ depends on DEBUG_FS
4776+ depends on RING_BUFFER
4777+ default m
4778+ ---help---
4779+ A simple hardware latency detector. Use this module to detect
4780+ large latencies introduced by the behavior of the underlying
4781+ system firmware external to Linux. We do this using periodic
4782+ use of stop_machine to grab all available CPUs and measure
4783+ for unexplainable gaps in the CPU timestamp counter(s). By
4784+ default, the module is not enabled until the "enable" file
4785+ within the "hwlat_detector" debugfs directory is toggled.
4786+
4787+ This module is often used to detect SMI (System Management
4788+ Interrupts) on x86 systems, though is not x86 specific. To
4789+ this end, we default to using a sample window of 1 second,
4790+ during which we will sample for 0.5 seconds. If an SMI or
4791+ similar event occurs during that time, it is recorded
4792+ into an 8K samples global ring buffer until retreived.
4793+
4794+ WARNING: This software should never be enabled (it can be built
4795+ but should not be turned on after it is loaded) in a production
4796+ environment where high latencies are a concern since the
4797+ sampling mechanism actually introduces latencies for
4798+ regular tasks while the CPU(s) are being held.
4799+
4800+ If unsure, say N
4801+
4802 config PHANTOM
4803 tristate "Sensable PHANToM (PCI)"
4804 depends on PCI
4805diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
4806index 8ed0f8c..6a595eb 100644
4807--- a/drivers/misc/Makefile
4808+++ b/drivers/misc/Makefile
4809@@ -49,4 +49,5 @@ obj-y += carma/
4810 obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o
4811 obj-$(CONFIG_ALTERA_STAPL) +=altera-stapl/
4812 obj-$(CONFIG_INTEL_MEI) += mei/
4813+obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o
4814 obj-$(CONFIG_SI570) += si570.o
4815diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c
4816new file mode 100644
4817index 0000000..b7b7c90
4818--- /dev/null
4819+++ b/drivers/misc/hwlat_detector.c
4820@@ -0,0 +1,1212 @@
4821+/*
4822+ * hwlat_detector.c - A simple Hardware Latency detector.
4823+ *
4824+ * Use this module to detect large system latencies induced by the behavior of
4825+ * certain underlying system hardware or firmware, independent of Linux itself.
4826+ * The code was developed originally to detect the presence of SMIs on Intel
4827+ * and AMD systems, although there is no dependency upon x86 herein.
4828+ *
4829+ * The classical example usage of this module is in detecting the presence of
4830+ * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
4831+ * somewhat special form of hardware interrupt spawned from earlier CPU debug
4832+ * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
4833+ * LPC (or other device) to generate a special interrupt under certain
4834+ * circumstances, for example, upon expiration of a special SMI timer device,
4835+ * due to certain external thermal readings, on certain I/O address accesses,
4836+ * and other situations. An SMI hits a special CPU pin, triggers a special
4837+ * SMI mode (complete with special memory map), and the OS is unaware.
4838+ *
4839+ * Although certain hardware-inducing latencies are necessary (for example,
4840+ * a modern system often requires an SMI handler for correct thermal control
4841+ * and remote management) they can wreak havoc upon any OS-level performance
4842+ * guarantees toward low-latency, especially when the OS is not even made
4843+ * aware of the presence of these interrupts. For this reason, we need a
4844+ * somewhat brute force mechanism to detect these interrupts. In this case,
4845+ * we do it by hogging all of the CPU(s) for configurable timer intervals,
4846+ * sampling the built-in CPU timer, looking for discontiguous readings.
4847+ *
4848+ * WARNING: This implementation necessarily introduces latencies. Therefore,
4849+ * you should NEVER use this module in a production environment
4850+ * requiring any kind of low-latency performance guarantee(s).
4851+ *
4852+ * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
4853+ *
4854+ * Includes useful feedback from Clark Williams <clark@redhat.com>
4855+ *
4856+ * This file is licensed under the terms of the GNU General Public
4857+ * License version 2. This program is licensed "as is" without any
4858+ * warranty of any kind, whether express or implied.
4859+ */
4860+
4861+#include <linux/module.h>
4862+#include <linux/init.h>
4863+#include <linux/ring_buffer.h>
4864+#include <linux/stop_machine.h>
4865+#include <linux/time.h>
4866+#include <linux/hrtimer.h>
4867+#include <linux/kthread.h>
4868+#include <linux/debugfs.h>
4869+#include <linux/seq_file.h>
4870+#include <linux/uaccess.h>
4871+#include <linux/version.h>
4872+#include <linux/delay.h>
4873+#include <linux/slab.h>
4874+
4875+#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
4876+#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
4877+#define U64STR_SIZE 22 /* 20 digits max */
4878+
4879+#define VERSION "1.0.0"
4880+#define BANNER "hwlat_detector: "
4881+#define DRVNAME "hwlat_detector"
4882+#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
4883+#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
4884+#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
4885+
4886+/* Module metadata */
4887+
4888+MODULE_LICENSE("GPL");
4889+MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
4890+MODULE_DESCRIPTION("A simple hardware latency detector");
4891+MODULE_VERSION(VERSION);
4892+
4893+/* Module parameters */
4894+
4895+static int debug;
4896+static int enabled;
4897+static int threshold;
4898+
4899+module_param(debug, int, 0); /* enable debug */
4900+module_param(enabled, int, 0); /* enable detector */
4901+module_param(threshold, int, 0); /* latency threshold */
4902+
4903+/* Buffering and sampling */
4904+
4905+static struct ring_buffer *ring_buffer; /* sample buffer */
4906+static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */
4907+static unsigned long buf_size = BUF_SIZE_DEFAULT;
4908+static struct task_struct *kthread; /* sampling thread */
4909+
4910+/* DebugFS filesystem entries */
4911+
4912+static struct dentry *debug_dir; /* debugfs directory */
4913+static struct dentry *debug_max; /* maximum TSC delta */
4914+static struct dentry *debug_count; /* total detect count */
4915+static struct dentry *debug_sample_width; /* sample width us */
4916+static struct dentry *debug_sample_window; /* sample window us */
4917+static struct dentry *debug_sample; /* raw samples us */
4918+static struct dentry *debug_threshold; /* threshold us */
4919+static struct dentry *debug_enable; /* enable/disable */
4920+
4921+/* Individual samples and global state */
4922+
4923+struct sample; /* latency sample */
4924+struct data; /* Global state */
4925+
4926+/* Sampling functions */
4927+static int __buffer_add_sample(struct sample *sample);
4928+static struct sample *buffer_get_sample(struct sample *sample);
4929+static int get_sample(void *unused);
4930+
4931+/* Threading and state */
4932+static int kthread_fn(void *unused);
4933+static int start_kthread(void);
4934+static int stop_kthread(void);
4935+static void __reset_stats(void);
4936+static int init_stats(void);
4937+
4938+/* Debugfs interface */
4939+static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
4940+ size_t cnt, loff_t *ppos, const u64 *entry);
4941+static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
4942+ size_t cnt, loff_t *ppos, u64 *entry);
4943+static int debug_sample_fopen(struct inode *inode, struct file *filp);
4944+static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
4945+ size_t cnt, loff_t *ppos);
4946+static int debug_sample_release(struct inode *inode, struct file *filp);
4947+static int debug_enable_fopen(struct inode *inode, struct file *filp);
4948+static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
4949+ size_t cnt, loff_t *ppos);
4950+static ssize_t debug_enable_fwrite(struct file *file,
4951+ const char __user *user_buffer,
4952+ size_t user_size, loff_t *offset);
4953+
4954+/* Initialization functions */
4955+static int init_debugfs(void);
4956+static void free_debugfs(void);
4957+static int detector_init(void);
4958+static void detector_exit(void);
4959+
4960+/* Individual latency samples are stored here when detected and packed into
4961+ * the ring_buffer circular buffer, where they are overwritten when
4962+ * more than buf_size/sizeof(sample) samples are received. */
4963+struct sample {
4964+ u64 seqnum; /* unique sequence */
4965+ u64 duration; /* ktime delta */
4966+ struct timespec timestamp; /* wall time */
4967+ unsigned long lost;
4968+};
4969+
4970+/* keep the global state somewhere. Mostly used under stop_machine. */
4971+static struct data {
4972+
4973+ struct mutex lock; /* protect changes */
4974+
4975+ u64 count; /* total since reset */
4976+ u64 max_sample; /* max hardware latency */
4977+ u64 threshold; /* sample threshold level */
4978+
4979+ u64 sample_window; /* total sampling window (on+off) */
4980+ u64 sample_width; /* active sampling portion of window */
4981+
4982+ atomic_t sample_open; /* whether the sample file is open */
4983+
4984+ wait_queue_head_t wq; /* waitqeue for new sample values */
4985+
4986+} data;
4987+
4988+/**
4989+ * __buffer_add_sample - add a new latency sample recording to the ring buffer
4990+ * @sample: The new latency sample value
4991+ *
4992+ * This receives a new latency sample and records it in a global ring buffer.
4993+ * No additional locking is used in this case - suited for stop_machine use.
4994+ */
4995+static int __buffer_add_sample(struct sample *sample)
4996+{
4997+ return ring_buffer_write(ring_buffer,
4998+ sizeof(struct sample), sample);
4999+}
5000+
5001+/**
5002+ * buffer_get_sample - remove a hardware latency sample from the ring buffer
5003+ * @sample: Pre-allocated storage for the sample
5004+ *
5005+ * This retrieves a hardware latency sample from the global circular buffer
5006+ */
5007+static struct sample *buffer_get_sample(struct sample *sample)
5008+{
5009+ struct ring_buffer_event *e = NULL;
5010+ struct sample *s = NULL;
5011+ unsigned int cpu = 0;
5012+
5013+ if (!sample)
5014+ return NULL;
5015+
5016+ mutex_lock(&ring_buffer_mutex);
5017+ for_each_online_cpu(cpu) {
5018+ e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
5019+ if (e)
5020+ break;
5021+ }
5022+
5023+ if (e) {
5024+ s = ring_buffer_event_data(e);
5025+ memcpy(sample, s, sizeof(struct sample));
5026+ } else
5027+ sample = NULL;
5028+ mutex_unlock(&ring_buffer_mutex);
5029+
5030+ return sample;
5031+}
5032+
5033+/**
5034+ * get_sample - sample the CPU TSC and look for likely hardware latencies
5035+ * @unused: This is not used but is a part of the stop_machine API
5036+ *
5037+ * Used to repeatedly capture the CPU TSC (or similar), looking for potential
5038+ * hardware-induced latency. Called under stop_machine, with data.lock held.
5039+ */
5040+static int get_sample(void *unused)
5041+{
5042+ ktime_t start, t1, t2;
5043+ s64 diff, total = 0;
5044+ u64 sample = 0;
5045+ int ret = 1;
5046+
5047+ start = ktime_get(); /* start timestamp */
5048+
5049+ do {
5050+
5051+ t1 = ktime_get(); /* we'll look for a discontinuity */
5052+ t2 = ktime_get();
5053+
5054+ total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
5055+ diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */
5056+
5057+ /* This shouldn't happen */
5058+ if (diff < 0) {
5059+ printk(KERN_ERR BANNER "time running backwards\n");
5060+ goto out;
5061+ }
5062+
5063+ if (diff > sample)
5064+ sample = diff; /* only want highest value */
5065+
5066+ } while (total <= data.sample_width);
5067+
5068+ /* If we exceed the threshold value, we have found a hardware latency */
5069+ if (sample > data.threshold) {
5070+ struct sample s;
5071+
5072+ data.count++;
5073+ s.seqnum = data.count;
5074+ s.duration = sample;
5075+ s.timestamp = CURRENT_TIME;
5076+ __buffer_add_sample(&s);
5077+
5078+ /* Keep a running maximum ever recorded hardware latency */
5079+ if (sample > data.max_sample)
5080+ data.max_sample = sample;
5081+ }
5082+
5083+ ret = 0;
5084+out:
5085+ return ret;
5086+}
5087+
5088+/*
5089+ * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
5090+ * @unused: A required part of the kthread API.
5091+ *
5092+ * Used to periodically sample the CPU TSC via a call to get_sample. We
5093+ * use stop_machine, whith does (intentionally) introduce latency since we
5094+ * need to ensure nothing else might be running (and thus pre-empting).
5095+ * Obviously this should never be used in production environments.
5096+ *
5097+ * stop_machine will schedule us typically only on CPU0 which is fine for
5098+ * almost every real-world hardware latency situation - but we might later
5099+ * generalize this if we find there are any actualy systems with alternate
5100+ * SMI delivery or other non CPU0 hardware latencies.
5101+ */
5102+static int kthread_fn(void *unused)
5103+{
5104+ int err = 0;
5105+ u64 interval = 0;
5106+
5107+ while (!kthread_should_stop()) {
5108+
5109+ mutex_lock(&data.lock);
5110+
5111+ err = stop_machine(get_sample, unused, 0);
5112+ if (err) {
5113+ /* Houston, we have a problem */
5114+ mutex_unlock(&data.lock);
5115+ goto err_out;
5116+ }
5117+
5118+ wake_up(&data.wq); /* wake up reader(s) */
5119+
5120+ interval = data.sample_window - data.sample_width;
5121+ do_div(interval, USEC_PER_MSEC); /* modifies interval value */
5122+
5123+ mutex_unlock(&data.lock);
5124+
5125+ if (msleep_interruptible(interval))
5126+ goto out;
5127+ }
5128+ goto out;
5129+err_out:
5130+ printk(KERN_ERR BANNER "could not call stop_machine, disabling\n");
5131+ enabled = 0;
5132+out:
5133+ return err;
5134+
5135+}
5136+
5137+/**
5138+ * start_kthread - Kick off the hardware latency sampling/detector kthread
5139+ *
5140+ * This starts a kernel thread that will sit and sample the CPU timestamp
5141+ * counter (TSC or similar) and look for potential hardware latencies.
5142+ */
5143+static int start_kthread(void)
5144+{
5145+ kthread = kthread_run(kthread_fn, NULL,
5146+ DRVNAME);
5147+ if (IS_ERR(kthread)) {
5148+ printk(KERN_ERR BANNER "could not start sampling thread\n");
5149+ enabled = 0;
5150+ return -ENOMEM;
5151+ }
5152+
5153+ return 0;
5154+}
5155+
5156+/**
5157+ * stop_kthread - Inform the hardware latency samping/detector kthread to stop
5158+ *
5159+ * This kicks the running hardware latency sampling/detector kernel thread and
5160+ * tells it to stop sampling now. Use this on unload and at system shutdown.
5161+ */
5162+static int stop_kthread(void)
5163+{
5164+ int ret;
5165+
5166+ ret = kthread_stop(kthread);
5167+
5168+ return ret;
5169+}
5170+
5171+/**
5172+ * __reset_stats - Reset statistics for the hardware latency detector
5173+ *
5174+ * We use data to store various statistics and global state. We call this
5175+ * function in order to reset those when "enable" is toggled on or off, and
5176+ * also at initialization. Should be called with data.lock held.
5177+ */
5178+static void __reset_stats(void)
5179+{
5180+ data.count = 0;
5181+ data.max_sample = 0;
5182+ ring_buffer_reset(ring_buffer); /* flush out old sample entries */
5183+}
5184+
5185+/**
5186+ * init_stats - Setup global state statistics for the hardware latency detector
5187+ *
5188+ * We use data to store various statistics and global state. We also use
5189+ * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
5190+ * induced system latencies. This function initializes these structures and
5191+ * allocates the global ring buffer also.
5192+ */
5193+static int init_stats(void)
5194+{
5195+ int ret = -ENOMEM;
5196+
5197+ mutex_init(&data.lock);
5198+ init_waitqueue_head(&data.wq);
5199+ atomic_set(&data.sample_open, 0);
5200+
5201+ ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
5202+
5203+ if (WARN(!ring_buffer, KERN_ERR BANNER
5204+ "failed to allocate ring buffer!\n"))
5205+ goto out;
5206+
5207+ __reset_stats();
5208+ data.threshold = DEFAULT_LAT_THRESHOLD; /* threshold us */
5209+ data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
5210+ data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
5211+
5212+ ret = 0;
5213+
5214+out:
5215+ return ret;
5216+
5217+}
5218+
5219+/*
5220+ * simple_data_read - Wrapper read function for global state debugfs entries
5221+ * @filp: The active open file structure for the debugfs "file"
5222+ * @ubuf: The userspace provided buffer to read value into
5223+ * @cnt: The maximum number of bytes to read
5224+ * @ppos: The current "file" position
5225+ * @entry: The entry to read from
5226+ *
5227+ * This function provides a generic read implementation for the global state
5228+ * "data" structure debugfs filesystem entries. It would be nice to use
5229+ * simple_attr_read directly, but we need to make sure that the data.lock
5230+ * spinlock is held during the actual read (even though we likely won't ever
5231+ * actually race here as the updater runs under a stop_machine context).
5232+ */
5233+static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
5234+ size_t cnt, loff_t *ppos, const u64 *entry)
5235+{
5236+ char buf[U64STR_SIZE];
5237+ u64 val = 0;
5238+ int len = 0;
5239+
5240+ memset(buf, 0, sizeof(buf));
5241+
5242+ if (!entry)
5243+ return -EFAULT;
5244+
5245+ mutex_lock(&data.lock);
5246+ val = *entry;
5247+ mutex_unlock(&data.lock);
5248+
5249+ len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
5250+
5251+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
5252+
5253+}
5254+
5255+/*
5256+ * simple_data_write - Wrapper write function for global state debugfs entries
5257+ * @filp: The active open file structure for the debugfs "file"
5258+ * @ubuf: The userspace provided buffer to write value from
5259+ * @cnt: The maximum number of bytes to write
5260+ * @ppos: The current "file" position
5261+ * @entry: The entry to write to
5262+ *
5263+ * This function provides a generic write implementation for the global state
5264+ * "data" structure debugfs filesystem entries. It would be nice to use
5265+ * simple_attr_write directly, but we need to make sure that the data.lock
5266+ * spinlock is held during the actual write (even though we likely won't ever
5267+ * actually race here as the updater runs under a stop_machine context).
5268+ */
5269+static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
5270+ size_t cnt, loff_t *ppos, u64 *entry)
5271+{
5272+ char buf[U64STR_SIZE];
5273+ int csize = min(cnt, sizeof(buf));
5274+ u64 val = 0;
5275+ int err = 0;
5276+
5277+ memset(buf, '\0', sizeof(buf));
5278+ if (copy_from_user(buf, ubuf, csize))
5279+ return -EFAULT;
5280+
5281+ buf[U64STR_SIZE-1] = '\0'; /* just in case */
5282+ err = strict_strtoull(buf, 10, &val);
5283+ if (err)
5284+ return -EINVAL;
5285+
5286+ mutex_lock(&data.lock);
5287+ *entry = val;
5288+ mutex_unlock(&data.lock);
5289+
5290+ return csize;
5291+}
5292+
5293+/**
5294+ * debug_count_fopen - Open function for "count" debugfs entry
5295+ * @inode: The in-kernel inode representation of the debugfs "file"
5296+ * @filp: The active open file structure for the debugfs "file"
5297+ *
5298+ * This function provides an open implementation for the "count" debugfs
5299+ * interface to the hardware latency detector.
5300+ */
5301+static int debug_count_fopen(struct inode *inode, struct file *filp)
5302+{
5303+ return 0;
5304+}
5305+
5306+/**
5307+ * debug_count_fread - Read function for "count" debugfs entry
5308+ * @filp: The active open file structure for the debugfs "file"
5309+ * @ubuf: The userspace provided buffer to read value into
5310+ * @cnt: The maximum number of bytes to read
5311+ * @ppos: The current "file" position
5312+ *
5313+ * This function provides a read implementation for the "count" debugfs
5314+ * interface to the hardware latency detector. Can be used to read the
5315+ * number of latency readings exceeding the configured threshold since
5316+ * the detector was last reset (e.g. by writing a zero into "count").
5317+ */
5318+static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
5319+ size_t cnt, loff_t *ppos)
5320+{
5321+ return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
5322+}
5323+
5324+/**
5325+ * debug_count_fwrite - Write function for "count" debugfs entry
5326+ * @filp: The active open file structure for the debugfs "file"
5327+ * @ubuf: The user buffer that contains the value to write
5328+ * @cnt: The maximum number of bytes to write to "file"
5329+ * @ppos: The current position in the debugfs "file"
5330+ *
5331+ * This function provides a write implementation for the "count" debugfs
5332+ * interface to the hardware latency detector. Can be used to write a
5333+ * desired value, especially to zero the total count.
5334+ */
5335+static ssize_t debug_count_fwrite(struct file *filp,
5336+ const char __user *ubuf,
5337+ size_t cnt,
5338+ loff_t *ppos)
5339+{
5340+ return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
5341+}
5342+
5343+/**
5344+ * debug_enable_fopen - Dummy open function for "enable" debugfs interface
5345+ * @inode: The in-kernel inode representation of the debugfs "file"
5346+ * @filp: The active open file structure for the debugfs "file"
5347+ *
5348+ * This function provides an open implementation for the "enable" debugfs
5349+ * interface to the hardware latency detector.
5350+ */
5351+static int debug_enable_fopen(struct inode *inode, struct file *filp)
5352+{
5353+ return 0;
5354+}
5355+
5356+/**
5357+ * debug_enable_fread - Read function for "enable" debugfs interface
5358+ * @filp: The active open file structure for the debugfs "file"
5359+ * @ubuf: The userspace provided buffer to read value into
5360+ * @cnt: The maximum number of bytes to read
5361+ * @ppos: The current "file" position
5362+ *
5363+ * This function provides a read implementation for the "enable" debugfs
5364+ * interface to the hardware latency detector. Can be used to determine
5365+ * whether the detector is currently enabled ("0\n" or "1\n" returned).
5366+ */
5367+static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
5368+ size_t cnt, loff_t *ppos)
5369+{
5370+ char buf[4];
5371+
5372+ if ((cnt < sizeof(buf)) || (*ppos))
5373+ return 0;
5374+
5375+ buf[0] = enabled ? '1' : '0';
5376+ buf[1] = '\n';
5377+ buf[2] = '\0';
5378+ if (copy_to_user(ubuf, buf, strlen(buf)))
5379+ return -EFAULT;
5380+ return *ppos = strlen(buf);
5381+}
5382+
5383+/**
5384+ * debug_enable_fwrite - Write function for "enable" debugfs interface
5385+ * @filp: The active open file structure for the debugfs "file"
5386+ * @ubuf: The user buffer that contains the value to write
5387+ * @cnt: The maximum number of bytes to write to "file"
5388+ * @ppos: The current position in the debugfs "file"
5389+ *
5390+ * This function provides a write implementation for the "enable" debugfs
5391+ * interface to the hardware latency detector. Can be used to enable or
5392+ * disable the detector, which will have the side-effect of possibly
5393+ * also resetting the global stats and kicking off the measuring
5394+ * kthread (on an enable) or the converse (upon a disable).
5395+ */
5396+static ssize_t debug_enable_fwrite(struct file *filp,
5397+ const char __user *ubuf,
5398+ size_t cnt,
5399+ loff_t *ppos)
5400+{
5401+ char buf[4];
5402+ int csize = min(cnt, sizeof(buf));
5403+ long val = 0;
5404+ int err = 0;
5405+
5406+ memset(buf, '\0', sizeof(buf));
5407+ if (copy_from_user(buf, ubuf, csize))
5408+ return -EFAULT;
5409+
5410+ buf[sizeof(buf)-1] = '\0'; /* just in case */
5411+ err = strict_strtoul(buf, 10, &val);
5412+ if (0 != err)
5413+ return -EINVAL;
5414+
5415+ if (val) {
5416+ if (enabled)
5417+ goto unlock;
5418+ enabled = 1;
5419+ __reset_stats();
5420+ if (start_kthread())
5421+ return -EFAULT;
5422+ } else {
5423+ if (!enabled)
5424+ goto unlock;
5425+ enabled = 0;
5426+ err = stop_kthread();
5427+ if (err) {
5428+ printk(KERN_ERR BANNER "cannot stop kthread\n");
5429+ return -EFAULT;
5430+ }
5431+ wake_up(&data.wq); /* reader(s) should return */
5432+ }
5433+unlock:
5434+ return csize;
5435+}
5436+
5437+/**
5438+ * debug_max_fopen - Open function for "max" debugfs entry
5439+ * @inode: The in-kernel inode representation of the debugfs "file"
5440+ * @filp: The active open file structure for the debugfs "file"
5441+ *
5442+ * This function provides an open implementation for the "max" debugfs
5443+ * interface to the hardware latency detector.
5444+ */
5445+static int debug_max_fopen(struct inode *inode, struct file *filp)
5446+{
5447+ return 0;
5448+}
5449+
5450+/**
5451+ * debug_max_fread - Read function for "max" debugfs entry
5452+ * @filp: The active open file structure for the debugfs "file"
5453+ * @ubuf: The userspace provided buffer to read value into
5454+ * @cnt: The maximum number of bytes to read
5455+ * @ppos: The current "file" position
5456+ *
5457+ * This function provides a read implementation for the "max" debugfs
5458+ * interface to the hardware latency detector. Can be used to determine
5459+ * the maximum latency value observed since it was last reset.
5460+ */
5461+static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
5462+ size_t cnt, loff_t *ppos)
5463+{
5464+ return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
5465+}
5466+
5467+/**
5468+ * debug_max_fwrite - Write function for "max" debugfs entry
5469+ * @filp: The active open file structure for the debugfs "file"
5470+ * @ubuf: The user buffer that contains the value to write
5471+ * @cnt: The maximum number of bytes to write to "file"
5472+ * @ppos: The current position in the debugfs "file"
5473+ *
5474+ * This function provides a write implementation for the "max" debugfs
5475+ * interface to the hardware latency detector. Can be used to reset the
5476+ * maximum or set it to some other desired value - if, then, subsequent
5477+ * measurements exceed this value, the maximum will be updated.
5478+ */
5479+static ssize_t debug_max_fwrite(struct file *filp,
5480+ const char __user *ubuf,
5481+ size_t cnt,
5482+ loff_t *ppos)
5483+{
5484+ return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
5485+}
5486+
5487+
5488+/**
5489+ * debug_sample_fopen - An open function for "sample" debugfs interface
5490+ * @inode: The in-kernel inode representation of this debugfs "file"
5491+ * @filp: The active open file structure for the debugfs "file"
5492+ *
5493+ * This function handles opening the "sample" file within the hardware
5494+ * latency detector debugfs directory interface. This file is used to read
5495+ * raw samples from the global ring_buffer and allows the user to see a
5496+ * running latency history. Can be opened blocking or non-blocking,
5497+ * affecting whether it behaves as a buffer read pipe, or does not.
5498+ * Implements simple locking to prevent multiple simultaneous use.
5499+ */
5500+static int debug_sample_fopen(struct inode *inode, struct file *filp)
5501+{
5502+ if (!atomic_add_unless(&data.sample_open, 1, 1))
5503+ return -EBUSY;
5504+ else
5505+ return 0;
5506+}
5507+
5508+/**
5509+ * debug_sample_fread - A read function for "sample" debugfs interface
5510+ * @filp: The active open file structure for the debugfs "file"
5511+ * @ubuf: The user buffer that will contain the samples read
5512+ * @cnt: The maximum bytes to read from the debugfs "file"
5513+ * @ppos: The current position in the debugfs "file"
5514+ *
5515+ * This function handles reading from the "sample" file within the hardware
5516+ * latency detector debugfs directory interface. This file is used to read
5517+ * raw samples from the global ring_buffer and allows the user to see a
5518+ * running latency history. By default this will block pending a new
5519+ * value written into the sample buffer, unless there are already a
5520+ * number of value(s) waiting in the buffer, or the sample file was
5521+ * previously opened in a non-blocking mode of operation.
5522+ */
5523+static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
5524+ size_t cnt, loff_t *ppos)
5525+{
5526+ int len = 0;
5527+ char buf[64];
5528+ struct sample *sample = NULL;
5529+
5530+ if (!enabled)
5531+ return 0;
5532+
5533+ sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
5534+ if (!sample)
5535+ return -ENOMEM;
5536+
5537+ while (!buffer_get_sample(sample)) {
5538+
5539+ DEFINE_WAIT(wait);
5540+
5541+ if (filp->f_flags & O_NONBLOCK) {
5542+ len = -EAGAIN;
5543+ goto out;
5544+ }
5545+
5546+ prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
5547+ schedule();
5548+ finish_wait(&data.wq, &wait);
5549+
5550+ if (signal_pending(current)) {
5551+ len = -EINTR;
5552+ goto out;
5553+ }
5554+
5555+ if (!enabled) { /* enable was toggled */
5556+ len = 0;
5557+ goto out;
5558+ }
5559+ }
5560+
5561+ len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
5562+ sample->timestamp.tv_sec,
5563+ sample->timestamp.tv_nsec,
5564+ sample->duration);
5565+
5566+
5567+ /* handling partial reads is more trouble than it's worth */
5568+ if (len > cnt)
5569+ goto out;
5570+
5571+ if (copy_to_user(ubuf, buf, len))
5572+ len = -EFAULT;
5573+
5574+out:
5575+ kfree(sample);
5576+ return len;
5577+}
5578+
5579+/**
5580+ * debug_sample_release - Release function for "sample" debugfs interface
5581+ * @inode: The in-kernel inode represenation of the debugfs "file"
5582+ * @filp: The active open file structure for the debugfs "file"
5583+ *
5584+ * This function completes the close of the debugfs interface "sample" file.
5585+ * Frees the sample_open "lock" so that other users may open the interface.
5586+ */
5587+static int debug_sample_release(struct inode *inode, struct file *filp)
5588+{
5589+ atomic_dec(&data.sample_open);
5590+
5591+ return 0;
5592+}
5593+
5594+/**
5595+ * debug_threshold_fopen - Open function for "threshold" debugfs entry
5596+ * @inode: The in-kernel inode representation of the debugfs "file"
5597+ * @filp: The active open file structure for the debugfs "file"
5598+ *
5599+ * This function provides an open implementation for the "threshold" debugfs
5600+ * interface to the hardware latency detector.
5601+ */
5602+static int debug_threshold_fopen(struct inode *inode, struct file *filp)
5603+{
5604+ return 0;
5605+}
5606+
5607+/**
5608+ * debug_threshold_fread - Read function for "threshold" debugfs entry
5609+ * @filp: The active open file structure for the debugfs "file"
5610+ * @ubuf: The userspace provided buffer to read value into
5611+ * @cnt: The maximum number of bytes to read
5612+ * @ppos: The current "file" position
5613+ *
5614+ * This function provides a read implementation for the "threshold" debugfs
5615+ * interface to the hardware latency detector. It can be used to determine
5616+ * the current threshold level at which a latency will be recorded in the
5617+ * global ring buffer, typically on the order of 10us.
5618+ */
5619+static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
5620+ size_t cnt, loff_t *ppos)
5621+{
5622+ return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
5623+}
5624+
5625+/**
5626+ * debug_threshold_fwrite - Write function for "threshold" debugfs entry
5627+ * @filp: The active open file structure for the debugfs "file"
5628+ * @ubuf: The user buffer that contains the value to write
5629+ * @cnt: The maximum number of bytes to write to "file"
5630+ * @ppos: The current position in the debugfs "file"
5631+ *
5632+ * This function provides a write implementation for the "threshold" debugfs
5633+ * interface to the hardware latency detector. It can be used to configure
5634+ * the threshold level at which any subsequently detected latencies will
5635+ * be recorded into the global ring buffer.
5636+ */
5637+static ssize_t debug_threshold_fwrite(struct file *filp,
5638+ const char __user *ubuf,
5639+ size_t cnt,
5640+ loff_t *ppos)
5641+{
5642+ int ret;
5643+
5644+ ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
5645+
5646+ if (enabled)
5647+ wake_up_process(kthread);
5648+
5649+ return ret;
5650+}
5651+
5652+/**
5653+ * debug_width_fopen - Open function for "width" debugfs entry
5654+ * @inode: The in-kernel inode representation of the debugfs "file"
5655+ * @filp: The active open file structure for the debugfs "file"
5656+ *
5657+ * This function provides an open implementation for the "width" debugfs
5658+ * interface to the hardware latency detector.
5659+ */
5660+static int debug_width_fopen(struct inode *inode, struct file *filp)
5661+{
5662+ return 0;
5663+}
5664+
5665+/**
5666+ * debug_width_fread - Read function for "width" debugfs entry
5667+ * @filp: The active open file structure for the debugfs "file"
5668+ * @ubuf: The userspace provided buffer to read value into
5669+ * @cnt: The maximum number of bytes to read
5670+ * @ppos: The current "file" position
5671+ *
5672+ * This function provides a read implementation for the "width" debugfs
5673+ * interface to the hardware latency detector. It can be used to determine
5674+ * for how many us of the total window us we will actively sample for any
5675+ * hardware-induced latecy periods. Obviously, it is not possible to
5676+ * sample constantly and have the system respond to a sample reader, or,
5677+ * worse, without having the system appear to have gone out to lunch.
5678+ */
5679+static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
5680+ size_t cnt, loff_t *ppos)
5681+{
5682+ return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
5683+}
5684+
5685+/**
5686+ * debug_width_fwrite - Write function for "width" debugfs entry
5687+ * @filp: The active open file structure for the debugfs "file"
5688+ * @ubuf: The user buffer that contains the value to write
5689+ * @cnt: The maximum number of bytes to write to "file"
5690+ * @ppos: The current position in the debugfs "file"
5691+ *
5692+ * This function provides a write implementation for the "width" debugfs
5693+ * interface to the hardware latency detector. It can be used to configure
5694+ * for how many us of the total window us we will actively sample for any
5695+ * hardware-induced latency periods. Obviously, it is not possible to
5696+ * sample constantly and have the system respond to a sample reader, or,
5697+ * worse, without having the system appear to have gone out to lunch. It
5698+ * is enforced that width is less that the total window size.
5699+ */
5700+static ssize_t debug_width_fwrite(struct file *filp,
5701+ const char __user *ubuf,
5702+ size_t cnt,
5703+ loff_t *ppos)
5704+{
5705+ char buf[U64STR_SIZE];
5706+ int csize = min(cnt, sizeof(buf));
5707+ u64 val = 0;
5708+ int err = 0;
5709+
5710+ memset(buf, '\0', sizeof(buf));
5711+ if (copy_from_user(buf, ubuf, csize))
5712+ return -EFAULT;
5713+
5714+ buf[U64STR_SIZE-1] = '\0'; /* just in case */
5715+ err = strict_strtoull(buf, 10, &val);
5716+ if (0 != err)
5717+ return -EINVAL;
5718+
5719+ mutex_lock(&data.lock);
5720+ if (val < data.sample_window)
5721+ data.sample_width = val;
5722+ else {
5723+ mutex_unlock(&data.lock);
5724+ return -EINVAL;
5725+ }
5726+ mutex_unlock(&data.lock);
5727+
5728+ if (enabled)
5729+ wake_up_process(kthread);
5730+
5731+ return csize;
5732+}
5733+
5734+/**
5735+ * debug_window_fopen - Open function for "window" debugfs entry
5736+ * @inode: The in-kernel inode representation of the debugfs "file"
5737+ * @filp: The active open file structure for the debugfs "file"
5738+ *
5739+ * This function provides an open implementation for the "window" debugfs
5740+ * interface to the hardware latency detector. The window is the total time
5741+ * in us that will be considered one sample period. Conceptually, windows
5742+ * occur back-to-back and contain a sample width period during which
5743+ * actual sampling occurs.
5744+ */
5745+static int debug_window_fopen(struct inode *inode, struct file *filp)
5746+{
5747+ return 0;
5748+}
5749+
5750+/**
5751+ * debug_window_fread - Read function for "window" debugfs entry
5752+ * @filp: The active open file structure for the debugfs "file"
5753+ * @ubuf: The userspace provided buffer to read value into
5754+ * @cnt: The maximum number of bytes to read
5755+ * @ppos: The current "file" position
5756+ *
5757+ * This function provides a read implementation for the "window" debugfs
5758+ * interface to the hardware latency detector. The window is the total time
5759+ * in us that will be considered one sample period. Conceptually, windows
5760+ * occur back-to-back and contain a sample width period during which
5761+ * actual sampling occurs. Can be used to read the total window size.
5762+ */
5763+static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
5764+ size_t cnt, loff_t *ppos)
5765+{
5766+ return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
5767+}
5768+
5769+/**
5770+ * debug_window_fwrite - Write function for "window" debugfs entry
5771+ * @filp: The active open file structure for the debugfs "file"
5772+ * @ubuf: The user buffer that contains the value to write
5773+ * @cnt: The maximum number of bytes to write to "file"
5774+ * @ppos: The current position in the debugfs "file"
5775+ *
5776+ * This function provides a write implementation for the "window" debufds
5777+ * interface to the hardware latency detetector. The window is the total time
5778+ * in us that will be considered one sample period. Conceptually, windows
5779+ * occur back-to-back and contain a sample width period during which
5780+ * actual sampling occurs. Can be used to write a new total window size. It
5781+ * is enfoced that any value written must be greater than the sample width
5782+ * size, or an error results.
5783+ */
5784+static ssize_t debug_window_fwrite(struct file *filp,
5785+ const char __user *ubuf,
5786+ size_t cnt,
5787+ loff_t *ppos)
5788+{
5789+ char buf[U64STR_SIZE];
5790+ int csize = min(cnt, sizeof(buf));
5791+ u64 val = 0;
5792+ int err = 0;
5793+
5794+ memset(buf, '\0', sizeof(buf));
5795+ if (copy_from_user(buf, ubuf, csize))
5796+ return -EFAULT;
5797+
5798+ buf[U64STR_SIZE-1] = '\0'; /* just in case */
5799+ err = strict_strtoull(buf, 10, &val);
5800+ if (0 != err)
5801+ return -EINVAL;
5802+
5803+ mutex_lock(&data.lock);
5804+ if (data.sample_width < val)
5805+ data.sample_window = val;
5806+ else {
5807+ mutex_unlock(&data.lock);
5808+ return -EINVAL;
5809+ }
5810+ mutex_unlock(&data.lock);
5811+
5812+ return csize;
5813+}
5814+
5815+/*
5816+ * Function pointers for the "count" debugfs file operations
5817+ */
5818+static const struct file_operations count_fops = {
5819+ .open = debug_count_fopen,
5820+ .read = debug_count_fread,
5821+ .write = debug_count_fwrite,
5822+ .owner = THIS_MODULE,
5823+};
5824+
5825+/*
5826+ * Function pointers for the "enable" debugfs file operations
5827+ */
5828+static const struct file_operations enable_fops = {
5829+ .open = debug_enable_fopen,
5830+ .read = debug_enable_fread,
5831+ .write = debug_enable_fwrite,
5832+ .owner = THIS_MODULE,
5833+};
5834+
5835+/*
5836+ * Function pointers for the "max" debugfs file operations
5837+ */
5838+static const struct file_operations max_fops = {
5839+ .open = debug_max_fopen,
5840+ .read = debug_max_fread,
5841+ .write = debug_max_fwrite,
5842+ .owner = THIS_MODULE,
5843+};
5844+
5845+/*
5846+ * Function pointers for the "sample" debugfs file operations
5847+ */
5848+static const struct file_operations sample_fops = {
5849+ .open = debug_sample_fopen,
5850+ .read = debug_sample_fread,
5851+ .release = debug_sample_release,
5852+ .owner = THIS_MODULE,
5853+};
5854+
5855+/*
5856+ * Function pointers for the "threshold" debugfs file operations
5857+ */
5858+static const struct file_operations threshold_fops = {
5859+ .open = debug_threshold_fopen,
5860+ .read = debug_threshold_fread,
5861+ .write = debug_threshold_fwrite,
5862+ .owner = THIS_MODULE,
5863+};
5864+
5865+/*
5866+ * Function pointers for the "width" debugfs file operations
5867+ */
5868+static const struct file_operations width_fops = {
5869+ .open = debug_width_fopen,
5870+ .read = debug_width_fread,
5871+ .write = debug_width_fwrite,
5872+ .owner = THIS_MODULE,
5873+};
5874+
5875+/*
5876+ * Function pointers for the "window" debugfs file operations
5877+ */
5878+static const struct file_operations window_fops = {
5879+ .open = debug_window_fopen,
5880+ .read = debug_window_fread,
5881+ .write = debug_window_fwrite,
5882+ .owner = THIS_MODULE,
5883+};
5884+
5885+/**
5886+ * init_debugfs - A function to initialize the debugfs interface files
5887+ *
5888+ * This function creates entries in debugfs for "hwlat_detector", including
5889+ * files to read values from the detector, current samples, and the
5890+ * maximum sample that has been captured since the hardware latency
5891+ * dectector was started.
5892+ */
5893+static int init_debugfs(void)
5894+{
5895+ int ret = -ENOMEM;
5896+
5897+ debug_dir = debugfs_create_dir(DRVNAME, NULL);
5898+ if (!debug_dir)
5899+ goto err_debug_dir;
5900+
5901+ debug_sample = debugfs_create_file("sample", 0444,
5902+ debug_dir, NULL,
5903+ &sample_fops);
5904+ if (!debug_sample)
5905+ goto err_sample;
5906+
5907+ debug_count = debugfs_create_file("count", 0444,
5908+ debug_dir, NULL,
5909+ &count_fops);
5910+ if (!debug_count)
5911+ goto err_count;
5912+
5913+ debug_max = debugfs_create_file("max", 0444,
5914+ debug_dir, NULL,
5915+ &max_fops);
5916+ if (!debug_max)
5917+ goto err_max;
5918+
5919+ debug_sample_window = debugfs_create_file("window", 0644,
5920+ debug_dir, NULL,
5921+ &window_fops);
5922+ if (!debug_sample_window)
5923+ goto err_window;
5924+
5925+ debug_sample_width = debugfs_create_file("width", 0644,
5926+ debug_dir, NULL,
5927+ &width_fops);
5928+ if (!debug_sample_width)
5929+ goto err_width;
5930+
5931+ debug_threshold = debugfs_create_file("threshold", 0644,
5932+ debug_dir, NULL,
5933+ &threshold_fops);
5934+ if (!debug_threshold)
5935+ goto err_threshold;
5936+
5937+ debug_enable = debugfs_create_file("enable", 0644,
5938+ debug_dir, &enabled,
5939+ &enable_fops);
5940+ if (!debug_enable)
5941+ goto err_enable;
5942+
5943+ else {
5944+ ret = 0;
5945+ goto out;
5946+ }
5947+
5948+err_enable:
5949+ debugfs_remove(debug_threshold);
5950+err_threshold:
5951+ debugfs_remove(debug_sample_width);
5952+err_width:
5953+ debugfs_remove(debug_sample_window);
5954+err_window:
5955+ debugfs_remove(debug_max);
5956+err_max:
5957+ debugfs_remove(debug_count);
5958+err_count:
5959+ debugfs_remove(debug_sample);
5960+err_sample:
5961+ debugfs_remove(debug_dir);
5962+err_debug_dir:
5963+out:
5964+ return ret;
5965+}
5966+
5967+/**
5968+ * free_debugfs - A function to cleanup the debugfs file interface
5969+ */
5970+static void free_debugfs(void)
5971+{
5972+ /* could also use a debugfs_remove_recursive */
5973+ debugfs_remove(debug_enable);
5974+ debugfs_remove(debug_threshold);
5975+ debugfs_remove(debug_sample_width);
5976+ debugfs_remove(debug_sample_window);
5977+ debugfs_remove(debug_max);
5978+ debugfs_remove(debug_count);
5979+ debugfs_remove(debug_sample);
5980+ debugfs_remove(debug_dir);
5981+}
5982+
5983+/**
5984+ * detector_init - Standard module initialization code
5985+ */
5986+static int detector_init(void)
5987+{
5988+ int ret = -ENOMEM;
5989+
5990+ printk(KERN_INFO BANNER "version %s\n", VERSION);
5991+
5992+ ret = init_stats();
5993+ if (0 != ret)
5994+ goto out;
5995+
5996+ ret = init_debugfs();
5997+ if (0 != ret)
5998+ goto err_stats;
5999+
6000+ if (enabled)
6001+ ret = start_kthread();
6002+
6003+ goto out;
6004+
6005+err_stats:
6006+ ring_buffer_free(ring_buffer);
6007+out:
6008+ return ret;
6009+
6010+}
6011+
6012+/**
6013+ * detector_exit - Standard module cleanup code
6014+ */
6015+static void detector_exit(void)
6016+{
6017+ int err;
6018+
6019+ if (enabled) {
6020+ enabled = 0;
6021+ err = stop_kthread();
6022+ if (err)
6023+ printk(KERN_ERR BANNER "cannot stop kthread\n");
6024+ }
6025+
6026+ free_debugfs();
6027+ ring_buffer_free(ring_buffer); /* free up the ring buffer */
6028+
6029+}
6030+
6031+module_init(detector_init);
6032+module_exit(detector_exit);
6033diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
6034index 1507723..724f478 100644
6035--- a/drivers/mmc/host/mmci.c
6036+++ b/drivers/mmc/host/mmci.c
6037@@ -930,15 +930,12 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
6038 struct sg_mapping_iter *sg_miter = &host->sg_miter;
6039 struct variant_data *variant = host->variant;
6040 void __iomem *base = host->base;
6041- unsigned long flags;
6042 u32 status;
6043
6044 status = readl(base + MMCISTATUS);
6045
6046 dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
6047
6048- local_irq_save(flags);
6049-
6050 do {
6051 unsigned int remain, len;
6052 char *buffer;
6053@@ -978,8 +975,6 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
6054
6055 sg_miter_stop(sg_miter);
6056
6057- local_irq_restore(flags);
6058-
6059 /*
6060 * If we have less than the fifo 'half-full' threshold to transfer,
6061 * trigger a PIO interrupt as soon as any data is available.
6062diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
6063index 6a70184..0052e52 100644
6064--- a/drivers/net/Kconfig
6065+++ b/drivers/net/Kconfig
6066@@ -164,6 +164,7 @@ config VXLAN
6067
6068 config NETCONSOLE
6069 tristate "Network console logging support"
6070+ depends on !PREEMPT_RT_FULL
6071 ---help---
6072 If you want to log kernel messages over the network, enable this.
6073 See <file:Documentation/networking/netconsole.txt> for details.
6074diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
6075index ed0feb3..0da3917 100644
6076--- a/drivers/net/ethernet/3com/3c59x.c
6077+++ b/drivers/net/ethernet/3com/3c59x.c
6078@@ -843,9 +843,9 @@ static void poll_vortex(struct net_device *dev)
6079 {
6080 struct vortex_private *vp = netdev_priv(dev);
6081 unsigned long flags;
6082- local_irq_save(flags);
6083+ local_irq_save_nort(flags);
6084 (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
6085- local_irq_restore(flags);
6086+ local_irq_restore_nort(flags);
6087 }
6088 #endif
6089
6090@@ -1919,12 +1919,12 @@ static void vortex_tx_timeout(struct net_device *dev)
6091 * Block interrupts because vortex_interrupt does a bare spin_lock()
6092 */
6093 unsigned long flags;
6094- local_irq_save(flags);
6095+ local_irq_save_nort(flags);
6096 if (vp->full_bus_master_tx)
6097 boomerang_interrupt(dev->irq, dev);
6098 else
6099 vortex_interrupt(dev->irq, dev);
6100- local_irq_restore(flags);
6101+ local_irq_restore_nort(flags);
6102 }
6103 }
6104
6105diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
6106index bfcb8bc..dbe44ba 100644
6107--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
6108+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
6109@@ -2171,11 +2171,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
6110 }
6111
6112 tpd_req = atl1c_cal_tpd_req(skb);
6113- if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
6114- if (netif_msg_pktdata(adapter))
6115- dev_info(&adapter->pdev->dev, "tx locked\n");
6116- return NETDEV_TX_LOCKED;
6117- }
6118+ spin_lock_irqsave(&adapter->tx_lock, flags);
6119
6120 if (atl1c_tpd_avail(adapter, type) < tpd_req) {
6121 /* no enough descriptor, just stop queue */
6122diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
6123index ca33b28..7569f68 100644
6124--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
6125+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
6126@@ -1803,8 +1803,7 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb,
6127 return NETDEV_TX_OK;
6128 }
6129 tpd_req = atl1e_cal_tdp_req(skb);
6130- if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
6131- return NETDEV_TX_LOCKED;
6132+ spin_lock_irqsave(&adapter->tx_lock, flags);
6133
6134 if (atl1e_tpd_avail(adapter) < tpd_req) {
6135 /* no enough descriptor, just stop queue */
6136diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
6137index d84872e..1420ea8 100644
6138--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
6139+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
6140@@ -1666,8 +1666,7 @@ static int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter,
6141 struct cmdQ *q = &sge->cmdQ[qid];
6142 unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
6143
6144- if (!spin_trylock(&q->lock))
6145- return NETDEV_TX_LOCKED;
6146+ spin_lock(&q->lock);
6147
6148 reclaim_completed_tx(sge, q);
6149
6150diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
6151index 1e9443d..d25961b 100644
6152--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
6153+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
6154@@ -1943,6 +1943,7 @@ static void tulip_remove_one(struct pci_dev *pdev)
6155 pci_iounmap(pdev, tp->base_addr);
6156 free_netdev (dev);
6157 pci_release_regions (pdev);
6158+ pci_disable_device (pdev);
6159 pci_set_drvdata (pdev, NULL);
6160
6161 /* pci_power_off (pdev, -1); */
6162diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
6163index bffb2ed..5c53535 100644
6164--- a/drivers/net/ethernet/freescale/gianfar.c
6165+++ b/drivers/net/ethernet/freescale/gianfar.c
6166@@ -1663,7 +1663,7 @@ void stop_gfar(struct net_device *dev)
6167
6168
6169 /* Lock it down */
6170- local_irq_save(flags);
6171+ local_irq_save_nort(flags);
6172 lock_tx_qs(priv);
6173 lock_rx_qs(priv);
6174
6175@@ -1671,7 +1671,7 @@ void stop_gfar(struct net_device *dev)
6176
6177 unlock_rx_qs(priv);
6178 unlock_tx_qs(priv);
6179- local_irq_restore(flags);
6180+ local_irq_restore_nort(flags);
6181
6182 /* Free the IRQs */
6183 if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
6184@@ -2951,7 +2951,7 @@ static void adjust_link(struct net_device *dev)
6185 struct phy_device *phydev = priv->phydev;
6186 int new_state = 0;
6187
6188- local_irq_save(flags);
6189+ local_irq_save_nort(flags);
6190 lock_tx_qs(priv);
6191
6192 if (phydev->link) {
6193@@ -3020,7 +3020,7 @@ static void adjust_link(struct net_device *dev)
6194 if (new_state && netif_msg_link(priv))
6195 phy_print_status(phydev);
6196 unlock_tx_qs(priv);
6197- local_irq_restore(flags);
6198+ local_irq_restore_nort(flags);
6199 }
6200
6201 /* Update the hash table based on the current list of multicast
6202diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
6203index 7c94c08..8757a2c 100644
6204--- a/drivers/net/ethernet/neterion/s2io.c
6205+++ b/drivers/net/ethernet/neterion/s2io.c
6206@@ -4088,12 +4088,7 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
6207 [skb->priority & (MAX_TX_FIFOS - 1)];
6208 fifo = &mac_control->fifos[queue];
6209
6210- if (do_spin_lock)
6211- spin_lock_irqsave(&fifo->tx_lock, flags);
6212- else {
6213- if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
6214- return NETDEV_TX_LOCKED;
6215- }
6216+ spin_lock_irqsave(&fifo->tx_lock, flags);
6217
6218 if (sp->config.multiq) {
6219 if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
6220diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
6221index 73ce7dd..b3ba6fe 100644
6222--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
6223+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
6224@@ -2114,10 +2114,8 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
6225 struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
6226 unsigned long flags;
6227
6228- if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
6229- /* Collision - tell upper layer to requeue */
6230- return NETDEV_TX_LOCKED;
6231- }
6232+ spin_lock_irqsave(&tx_ring->tx_lock, flags);
6233+
6234 if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
6235 netif_stop_queue(netdev);
6236 spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
6237diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
6238index 5dc1616..3bed27d 100644
6239--- a/drivers/net/ethernet/realtek/8139too.c
6240+++ b/drivers/net/ethernet/realtek/8139too.c
6241@@ -2216,7 +2216,7 @@ static void rtl8139_poll_controller(struct net_device *dev)
6242 struct rtl8139_private *tp = netdev_priv(dev);
6243 const int irq = tp->pci_dev->irq;
6244
6245- disable_irq(irq);
6246+ disable_irq_nosync(irq);
6247 rtl8139_interrupt(irq, dev);
6248 enable_irq(irq);
6249 }
6250diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
6251index 1e4d743..9dfd4f5 100644
6252--- a/drivers/net/ethernet/tehuti/tehuti.c
6253+++ b/drivers/net/ethernet/tehuti/tehuti.c
6254@@ -1630,13 +1630,8 @@ static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb,
6255 unsigned long flags;
6256
6257 ENTER;
6258- local_irq_save(flags);
6259- if (!spin_trylock(&priv->tx_lock)) {
6260- local_irq_restore(flags);
6261- DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
6262- BDX_DRV_NAME, ndev->name);
6263- return NETDEV_TX_LOCKED;
6264- }
6265+
6266+ spin_lock_irqsave(&priv->tx_lock, flags);
6267
6268 /* build tx descriptor */
6269 BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */
6270diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
6271index 4426151..de71b1e 100644
6272--- a/drivers/net/ethernet/ti/Kconfig
6273+++ b/drivers/net/ethernet/ti/Kconfig
6274@@ -88,8 +88,8 @@ config TLAN
6275 Please email feedback to <torben.mathiasen@compaq.com>.
6276
6277 config CPMAC
6278- tristate "TI AR7 CPMAC Ethernet support (EXPERIMENTAL)"
6279- depends on EXPERIMENTAL && AR7
6280+ tristate "TI AR7 CPMAC Ethernet support"
6281+ depends on AR7
6282 select PHYLIB
6283 ---help---
6284 TI AR7 CPMAC Ethernet support
6285diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
6286index d9625f6..70d1920 100644
6287--- a/drivers/net/ethernet/ti/cpmac.c
6288+++ b/drivers/net/ethernet/ti/cpmac.c
6289@@ -904,10 +904,9 @@ static int cpmac_set_ringparam(struct net_device *dev,
6290 static void cpmac_get_drvinfo(struct net_device *dev,
6291 struct ethtool_drvinfo *info)
6292 {
6293- strcpy(info->driver, "cpmac");
6294- strcpy(info->version, CPMAC_VERSION);
6295- info->fw_version[0] = '\0';
6296- sprintf(info->bus_info, "%s", "cpmac");
6297+ strlcpy(info->driver, "cpmac", sizeof(info->driver));
6298+ strlcpy(info->version, CPMAC_VERSION, sizeof(info->version));
6299+ snprintf(info->bus_info, sizeof(info->bus_info), "%s", "cpmac");
6300 info->regdump_len = 0;
6301 }
6302
6303diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
6304index 3b1be52..1cfde0c 100644
6305--- a/drivers/net/ethernet/ti/cpsw.c
6306+++ b/drivers/net/ethernet/ti/cpsw.c
6307@@ -32,6 +32,7 @@
6308 #include <linux/of.h>
6309 #include <linux/of_net.h>
6310 #include <linux/of_device.h>
6311+#include <linux/if_vlan.h>
6312
6313 #include <linux/platform_data/cpsw.h>
6314
6315@@ -118,6 +119,20 @@ do { \
6316 #define TX_PRIORITY_MAPPING 0x33221100
6317 #define CPDMA_TX_PRIORITY_MAP 0x76543210
6318
6319+#define CPSW_VLAN_AWARE BIT(1)
6320+#define CPSW_ALE_VLAN_AWARE 1
6321+
6322+#define CPSW_FIFO_NORMAL_MODE (0 << 15)
6323+#define CPSW_FIFO_DUAL_MAC_MODE (1 << 15)
6324+#define CPSW_FIFO_RATE_LIMIT_MODE (2 << 15)
6325+
6326+#define CPSW_INTPACEEN (0x3f << 16)
6327+#define CPSW_INTPRESCALE_MASK (0x7FF << 0)
6328+#define CPSW_CMINTMAX_CNT 63
6329+#define CPSW_CMINTMIN_CNT 2
6330+#define CPSW_CMINTMAX_INTVL (1000 / CPSW_CMINTMIN_CNT)
6331+#define CPSW_CMINTMIN_INTVL ((1000 / CPSW_CMINTMAX_CNT) + 1)
6332+
6333 #define cpsw_enable_irq(priv) \
6334 do { \
6335 u32 i; \
6336@@ -131,6 +146,10 @@ do { \
6337 disable_irq_nosync(priv->irqs_table[i]); \
6338 } while (0);
6339
6340+#define cpsw_slave_index(priv) \
6341+ ((priv->data.dual_emac) ? priv->emac_port : \
6342+ priv->data.active_slave)
6343+
6344 static int debug_level;
6345 module_param(debug_level, int, 0);
6346 MODULE_PARM_DESC(debug_level, "cpsw debug level (NETIF_MSG bits)");
6347@@ -152,6 +171,15 @@ struct cpsw_wr_regs {
6348 u32 rx_en;
6349 u32 tx_en;
6350 u32 misc_en;
6351+ u32 mem_allign1[8];
6352+ u32 rx_thresh_stat;
6353+ u32 rx_stat;
6354+ u32 tx_stat;
6355+ u32 misc_stat;
6356+ u32 mem_allign2[8];
6357+ u32 rx_imax;
6358+ u32 tx_imax;
6359+
6360 };
6361
6362 struct cpsw_ss_regs {
6363@@ -250,7 +278,7 @@ struct cpsw_ss_regs {
6364 struct cpsw_host_regs {
6365 u32 max_blks;
6366 u32 blk_cnt;
6367- u32 flow_thresh;
6368+ u32 tx_in_ctl;
6369 u32 port_vlan;
6370 u32 tx_pri_map;
6371 u32 cpdma_tx_pri_map;
6372@@ -277,6 +305,9 @@ struct cpsw_slave {
6373 u32 mac_control;
6374 struct cpsw_slave_data *data;
6375 struct phy_device *phy;
6376+ struct net_device *ndev;
6377+ u32 port_vlan;
6378+ u32 open_stat;
6379 };
6380
6381 static inline u32 slave_read(struct cpsw_slave *slave, u32 offset)
6382@@ -303,6 +334,8 @@ struct cpsw_priv {
6383 struct cpsw_host_regs __iomem *host_port_regs;
6384 u32 msg_enable;
6385 u32 version;
6386+ u32 coal_intvl;
6387+ u32 bus_freq_mhz;
6388 struct net_device_stats stats;
6389 int rx_packet_max;
6390 int host_port;
6391@@ -315,17 +348,69 @@ struct cpsw_priv {
6392 /* snapshot of IRQ numbers */
6393 u32 irqs_table[4];
6394 u32 num_irqs;
6395- struct cpts cpts;
6396+ bool irq_enabled;
6397+ struct cpts *cpts;
6398+ u32 emac_port;
6399 };
6400
6401 #define napi_to_priv(napi) container_of(napi, struct cpsw_priv, napi)
6402-#define for_each_slave(priv, func, arg...) \
6403- do { \
6404- int idx; \
6405- for (idx = 0; idx < (priv)->data.slaves; idx++) \
6406- (func)((priv)->slaves + idx, ##arg); \
6407+#define for_each_slave(priv, func, arg...) \
6408+ do { \
6409+ struct cpsw_slave *slave; \
6410+ int n; \
6411+ if (priv->data.dual_emac) \
6412+ (func)((priv)->slaves + priv->emac_port, ##arg);\
6413+ else \
6414+ for (n = (priv)->data.slaves, \
6415+ slave = (priv)->slaves; \
6416+ n; n--) \
6417+ (func)(slave++, ##arg); \
6418+ } while (0)
6419+#define cpsw_get_slave_ndev(priv, __slave_no__) \
6420+ (priv->slaves[__slave_no__].ndev)
6421+#define cpsw_get_slave_priv(priv, __slave_no__) \
6422+ ((priv->slaves[__slave_no__].ndev) ? \
6423+ netdev_priv(priv->slaves[__slave_no__].ndev) : NULL) \
6424+
6425+#define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb) \
6426+ do { \
6427+ if (!priv->data.dual_emac) \
6428+ break; \
6429+ if (CPDMA_RX_SOURCE_PORT(status) == 1) { \
6430+ ndev = cpsw_get_slave_ndev(priv, 0); \
6431+ priv = netdev_priv(ndev); \
6432+ skb->dev = ndev; \
6433+ } else if (CPDMA_RX_SOURCE_PORT(status) == 2) { \
6434+ ndev = cpsw_get_slave_ndev(priv, 1); \
6435+ priv = netdev_priv(ndev); \
6436+ skb->dev = ndev; \
6437+ } \
6438+ } while (0)
6439+#define cpsw_add_mcast(priv, addr) \
6440+ do { \
6441+ if (priv->data.dual_emac) { \
6442+ struct cpsw_slave *slave = priv->slaves + \
6443+ priv->emac_port; \
6444+ int slave_port = cpsw_get_slave_port(priv, \
6445+ slave->slave_num); \
6446+ cpsw_ale_add_mcast(priv->ale, addr, \
6447+ 1 << slave_port | 1 << priv->host_port, \
6448+ ALE_VLAN, slave->port_vlan, 0); \
6449+ } else { \
6450+ cpsw_ale_add_mcast(priv->ale, addr, \
6451+ ALE_ALL_PORTS << priv->host_port, \
6452+ 0, 0, 0); \
6453+ } \
6454 } while (0)
6455
6456+static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num)
6457+{
6458+ if (priv->host_port == 0)
6459+ return slave_num + 1;
6460+ else
6461+ return slave_num;
6462+}
6463+
6464 static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
6465 {
6466 struct cpsw_priv *priv = netdev_priv(ndev);
6467@@ -344,8 +429,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
6468
6469 /* program multicast address list into ALE register */
6470 netdev_for_each_mc_addr(ha, ndev) {
6471- cpsw_ale_add_mcast(priv->ale, (u8 *)ha->addr,
6472- ALE_ALL_PORTS << priv->host_port, 0, 0);
6473+ cpsw_add_mcast(priv, (u8 *)ha->addr);
6474 }
6475 }
6476 }
6477@@ -374,9 +458,12 @@ void cpsw_tx_handler(void *token, int len, int status)
6478 struct net_device *ndev = skb->dev;
6479 struct cpsw_priv *priv = netdev_priv(ndev);
6480
6481+ /* Check whether the queue is stopped due to stalled tx dma, if the
6482+ * queue is stopped then start the queue as we have free desc for tx
6483+ */
6484 if (unlikely(netif_queue_stopped(ndev)))
6485 netif_wake_queue(ndev);
6486- cpts_tx_timestamp(&priv->cpts, skb);
6487+ cpts_tx_timestamp(priv->cpts, skb);
6488 priv->stats.tx_packets++;
6489 priv->stats.tx_bytes += len;
6490 dev_kfree_skb_any(skb);
6491@@ -385,81 +472,105 @@ void cpsw_tx_handler(void *token, int len, int status)
6492 void cpsw_rx_handler(void *token, int len, int status)
6493 {
6494 struct sk_buff *skb = token;
6495+ struct sk_buff *new_skb;
6496 struct net_device *ndev = skb->dev;
6497 struct cpsw_priv *priv = netdev_priv(ndev);
6498 int ret = 0;
6499
6500- /* free and bail if we are shutting down */
6501- if (unlikely(!netif_running(ndev)) ||
6502- unlikely(!netif_carrier_ok(ndev))) {
6503+ cpsw_dual_emac_src_port_detect(status, priv, ndev, skb);
6504+
6505+ if (unlikely(status < 0)) {
6506+ /* the interface is going down, skbs are purged */
6507 dev_kfree_skb_any(skb);
6508 return;
6509 }
6510- if (likely(status >= 0)) {
6511+
6512+ new_skb = netdev_alloc_skb_ip_align(ndev, priv->rx_packet_max);
6513+ if (new_skb) {
6514 skb_put(skb, len);
6515- cpts_rx_timestamp(&priv->cpts, skb);
6516+ cpts_rx_timestamp(priv->cpts, skb);
6517 skb->protocol = eth_type_trans(skb, ndev);
6518- netif_receive_skb(skb);
6519+ netif_rx(skb);
6520 priv->stats.rx_bytes += len;
6521 priv->stats.rx_packets++;
6522- skb = NULL;
6523- }
6524-
6525- if (unlikely(!netif_running(ndev))) {
6526- if (skb)
6527- dev_kfree_skb_any(skb);
6528- return;
6529+ } else {
6530+ priv->stats.rx_dropped++;
6531+ new_skb = skb;
6532 }
6533
6534- if (likely(!skb)) {
6535- skb = netdev_alloc_skb_ip_align(ndev, priv->rx_packet_max);
6536- if (WARN_ON(!skb))
6537- return;
6538-
6539- ret = cpdma_chan_submit(priv->rxch, skb, skb->data,
6540- skb_tailroom(skb), GFP_KERNEL);
6541- }
6542- WARN_ON(ret < 0);
6543+ ret = cpdma_chan_submit(priv->rxch, new_skb, new_skb->data,
6544+ skb_tailroom(new_skb), 0);
6545+ if (WARN_ON(ret < 0))
6546+ dev_kfree_skb_any(new_skb);
6547 }
6548
6549 static irqreturn_t cpsw_interrupt(int irq, void *dev_id)
6550 {
6551 struct cpsw_priv *priv = dev_id;
6552+ unsigned long flags;
6553+ u32 rx, tx, rx_thresh;
6554+
6555+ spin_lock_irqsave(&priv->lock, flags);
6556+ rx_thresh = __raw_readl(&priv->wr_regs->rx_thresh_stat);
6557+ rx = __raw_readl(&priv->wr_regs->rx_stat);
6558+ tx = __raw_readl(&priv->wr_regs->tx_stat);
6559+ if (!rx_thresh && !rx && !tx) {
6560+ spin_unlock_irqrestore(&priv->lock, flags);
6561+ return IRQ_NONE;
6562+ }
6563
6564- if (likely(netif_running(priv->ndev))) {
6565- cpsw_intr_disable(priv);
6566+ cpsw_intr_disable(priv);
6567+ if (priv->irq_enabled == true) {
6568 cpsw_disable_irq(priv);
6569+ priv->irq_enabled = false;
6570+ }
6571+ spin_unlock_irqrestore(&priv->lock, flags);
6572+
6573+ if (netif_running(priv->ndev)) {
6574 napi_schedule(&priv->napi);
6575+ return IRQ_HANDLED;
6576 }
6577- return IRQ_HANDLED;
6578-}
6579
6580-static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num)
6581-{
6582- if (priv->host_port == 0)
6583- return slave_num + 1;
6584- else
6585- return slave_num;
6586+ priv = cpsw_get_slave_priv(priv, 1);
6587+ if (!priv)
6588+ return IRQ_NONE;
6589+
6590+ if (netif_running(priv->ndev)) {
6591+ napi_schedule(&priv->napi);
6592+ return IRQ_HANDLED;
6593+ }
6594+ return IRQ_NONE;
6595 }
6596
6597 static int cpsw_poll(struct napi_struct *napi, int budget)
6598 {
6599 struct cpsw_priv *priv = napi_to_priv(napi);
6600 int num_tx, num_rx;
6601+ unsigned long flags;
6602
6603+ spin_lock_irqsave(&priv->lock, flags);
6604 num_tx = cpdma_chan_process(priv->txch, 128);
6605- num_rx = cpdma_chan_process(priv->rxch, budget);
6606-
6607- if (num_rx || num_tx)
6608- cpsw_dbg(priv, intr, "poll %d rx, %d tx pkts\n",
6609- num_rx, num_tx);
6610+ if (num_tx)
6611+ cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
6612
6613+ num_rx = cpdma_chan_process(priv->rxch, budget);
6614 if (num_rx < budget) {
6615+ struct cpsw_priv *prim_cpsw;
6616+
6617 napi_complete(napi);
6618 cpsw_intr_enable(priv);
6619- cpdma_ctlr_eoi(priv->dma);
6620- cpsw_enable_irq(priv);
6621+ cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
6622+ prim_cpsw = cpsw_get_slave_priv(priv, 0);
6623+ if (prim_cpsw->irq_enabled == false) {
6624+ cpsw_enable_irq(priv);
6625+ prim_cpsw->irq_enabled = true;
6626+ }
6627 }
6628+ spin_unlock_irqrestore(&priv->lock, flags);
6629+
6630+ if (num_rx || num_tx)
6631+ cpsw_dbg(priv, intr, "poll %d rx, %d tx pkts\n",
6632+ num_rx, num_tx);
6633
6634 return num_rx;
6635 }
6636@@ -548,6 +659,77 @@ static void cpsw_adjust_link(struct net_device *ndev)
6637 }
6638 }
6639
6640+static int cpsw_get_coalesce(struct net_device *ndev,
6641+ struct ethtool_coalesce *coal)
6642+{
6643+ struct cpsw_priv *priv = netdev_priv(ndev);
6644+
6645+ coal->rx_coalesce_usecs = priv->coal_intvl;
6646+ return 0;
6647+}
6648+
6649+static int cpsw_set_coalesce(struct net_device *ndev,
6650+ struct ethtool_coalesce *coal)
6651+{
6652+ struct cpsw_priv *priv = netdev_priv(ndev);
6653+ u32 int_ctrl;
6654+ u32 num_interrupts = 0;
6655+ u32 prescale = 0;
6656+ u32 addnl_dvdr = 1;
6657+ u32 coal_intvl = 0;
6658+
6659+ if (!coal->rx_coalesce_usecs)
6660+ return -EINVAL;
6661+
6662+ coal_intvl = coal->rx_coalesce_usecs;
6663+
6664+ int_ctrl = readl(&priv->wr_regs->int_control);
6665+ prescale = priv->bus_freq_mhz * 4;
6666+
6667+ if (coal_intvl < CPSW_CMINTMIN_INTVL)
6668+ coal_intvl = CPSW_CMINTMIN_INTVL;
6669+
6670+ if (coal_intvl > CPSW_CMINTMAX_INTVL) {
6671+ /* Interrupt pacer works with 4us Pulse, we can
6672+ * throttle further by dilating the 4us pulse.
6673+ */
6674+ addnl_dvdr = CPSW_INTPRESCALE_MASK / prescale;
6675+
6676+ if (addnl_dvdr > 1) {
6677+ prescale *= addnl_dvdr;
6678+ if (coal_intvl > (CPSW_CMINTMAX_INTVL * addnl_dvdr))
6679+ coal_intvl = (CPSW_CMINTMAX_INTVL
6680+ * addnl_dvdr);
6681+ } else {
6682+ addnl_dvdr = 1;
6683+ coal_intvl = CPSW_CMINTMAX_INTVL;
6684+ }
6685+ }
6686+
6687+ num_interrupts = (1000 * addnl_dvdr) / coal_intvl;
6688+ writel(num_interrupts, &priv->wr_regs->rx_imax);
6689+ writel(num_interrupts, &priv->wr_regs->tx_imax);
6690+
6691+ int_ctrl |= CPSW_INTPACEEN;
6692+ int_ctrl &= (~CPSW_INTPRESCALE_MASK);
6693+ int_ctrl |= (prescale & CPSW_INTPRESCALE_MASK);
6694+ writel(int_ctrl, &priv->wr_regs->int_control);
6695+
6696+ cpsw_notice(priv, timer, "Set coalesce to %d usecs.\n", coal_intvl);
6697+ if (priv->data.dual_emac) {
6698+ int i;
6699+
6700+ for (i = 0; i < priv->data.slaves; i++) {
6701+ priv = netdev_priv(priv->slaves[i].ndev);
6702+ priv->coal_intvl = coal_intvl;
6703+ }
6704+ } else {
6705+ priv->coal_intvl = coal_intvl;
6706+ }
6707+
6708+ return 0;
6709+}
6710+
6711 static inline int __show_stat(char *buf, int maxlen, const char *name, u32 val)
6712 {
6713 static char *leader = "........................................";
6714@@ -559,6 +741,54 @@ static inline int __show_stat(char *buf, int maxlen, const char *name, u32 val)
6715 leader + strlen(name), val);
6716 }
6717
6718+static int cpsw_common_res_usage_state(struct cpsw_priv *priv)
6719+{
6720+ u32 i;
6721+ u32 usage_count = 0;
6722+
6723+ if (!priv->data.dual_emac)
6724+ return 0;
6725+
6726+ for (i = 0; i < priv->data.slaves; i++)
6727+ if (priv->slaves[i].open_stat)
6728+ usage_count++;
6729+
6730+ return usage_count;
6731+}
6732+
6733+static inline int cpsw_tx_packet_submit(struct net_device *ndev,
6734+ struct cpsw_priv *priv, struct sk_buff *skb)
6735+{
6736+ if (!priv->data.dual_emac)
6737+ return cpdma_chan_submit(priv->txch, skb, skb->data,
6738+ skb->len, 0);
6739+
6740+ if (ndev == cpsw_get_slave_ndev(priv, 0))
6741+ return cpdma_chan_submit(priv->txch, skb, skb->data,
6742+ skb->len, 1);
6743+ else
6744+ return cpdma_chan_submit(priv->txch, skb, skb->data,
6745+ skb->len, 2);
6746+}
6747+
6748+static inline void cpsw_add_dual_emac_def_ale_entries(
6749+ struct cpsw_priv *priv, struct cpsw_slave *slave,
6750+ u32 slave_port)
6751+{
6752+ u32 port_mask = 1 << slave_port | 1 << priv->host_port;
6753+
6754+ if (priv->version == CPSW_VERSION_1)
6755+ slave_write(slave, slave->port_vlan, CPSW1_PORT_VLAN);
6756+ else
6757+ slave_write(slave, slave->port_vlan, CPSW2_PORT_VLAN);
6758+ cpsw_ale_add_vlan(priv->ale, slave->port_vlan, port_mask,
6759+ port_mask, port_mask, 0);
6760+ cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
6761+ port_mask, ALE_VLAN, slave->port_vlan, 0);
6762+ cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
6763+ priv->host_port, ALE_VLAN, slave->port_vlan);
6764+}
6765+
6766 static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
6767 {
6768 char name[32];
6769@@ -588,8 +818,11 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
6770
6771 slave_port = cpsw_get_slave_port(priv, slave->slave_num);
6772
6773- cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
6774- 1 << slave_port, 0, ALE_MCAST_FWD_2);
6775+ if (priv->data.dual_emac)
6776+ cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port);
6777+ else
6778+ cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
6779+ 1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
6780
6781 slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
6782 &cpsw_adjust_link, 0, slave->data->phy_if);
6783@@ -604,14 +837,44 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
6784 }
6785 }
6786
6787+static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
6788+{
6789+ const int vlan = priv->data.default_vlan;
6790+ const int port = priv->host_port;
6791+ u32 reg;
6792+ int i;
6793+
6794+ reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN :
6795+ CPSW2_PORT_VLAN;
6796+
6797+ writel(vlan, &priv->host_port_regs->port_vlan);
6798+
6799+ for (i = 0; i < priv->data.slaves; i++)
6800+ slave_write(priv->slaves + i, vlan, reg);
6801+
6802+ cpsw_ale_add_vlan(priv->ale, vlan, ALE_ALL_PORTS << port,
6803+ ALE_ALL_PORTS << port, ALE_ALL_PORTS << port,
6804+ (ALE_PORT_1 | ALE_PORT_2) << port);
6805+}
6806+
6807 static void cpsw_init_host_port(struct cpsw_priv *priv)
6808 {
6809+ u32 control_reg;
6810+ u32 fifo_mode;
6811+
6812 /* soft reset the controller and initialize ale */
6813 soft_reset("cpsw", &priv->regs->soft_reset);
6814 cpsw_ale_start(priv->ale);
6815
6816 /* switch to vlan unaware mode */
6817- cpsw_ale_control_set(priv->ale, 0, ALE_VLAN_AWARE, 0);
6818+ cpsw_ale_control_set(priv->ale, priv->host_port, ALE_VLAN_AWARE,
6819+ CPSW_ALE_VLAN_AWARE);
6820+ control_reg = readl(&priv->regs->control);
6821+ control_reg |= CPSW_VLAN_AWARE;
6822+ writel(control_reg, &priv->regs->control);
6823+ fifo_mode = (priv->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
6824+ CPSW_FIFO_NORMAL_MODE;
6825+ writel(fifo_mode, &priv->host_port_regs->tx_in_ctl);
6826
6827 /* setup host port priority mapping */
6828 __raw_writel(CPDMA_TX_PRIORITY_MAP,
6829@@ -621,18 +884,32 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
6830 cpsw_ale_control_set(priv->ale, priv->host_port,
6831 ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
6832
6833- cpsw_ale_add_ucast(priv->ale, priv->mac_addr, priv->host_port, 0);
6834- cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
6835- 1 << priv->host_port, 0, ALE_MCAST_FWD_2);
6836+ if (!priv->data.dual_emac) {
6837+ cpsw_ale_add_ucast(priv->ale, priv->mac_addr, priv->host_port,
6838+ 0, 0);
6839+ cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
6840+ 1 << priv->host_port, 0, 0, ALE_MCAST_FWD_2);
6841+ }
6842+}
6843+
6844+static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv)
6845+{
6846+ if (!slave->phy)
6847+ return;
6848+ phy_stop(slave->phy);
6849+ phy_disconnect(slave->phy);
6850+ slave->phy = NULL;
6851 }
6852
6853 static int cpsw_ndo_open(struct net_device *ndev)
6854 {
6855 struct cpsw_priv *priv = netdev_priv(ndev);
6856+ struct cpsw_priv *prim_cpsw;
6857 int i, ret;
6858 u32 reg;
6859
6860- cpsw_intr_disable(priv);
6861+ if (!cpsw_common_res_usage_state(priv))
6862+ cpsw_intr_disable(priv);
6863 netif_carrier_off(ndev);
6864
6865 pm_runtime_get_sync(&priv->pdev->dev);
6866@@ -644,53 +921,81 @@ static int cpsw_ndo_open(struct net_device *ndev)
6867 CPSW_RTL_VERSION(reg));
6868
6869 /* initialize host and slave ports */
6870- cpsw_init_host_port(priv);
6871+ if (!cpsw_common_res_usage_state(priv))
6872+ cpsw_init_host_port(priv);
6873 for_each_slave(priv, cpsw_slave_open, priv);
6874
6875- /* setup tx dma to fixed prio and zero offset */
6876- cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
6877- cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0);
6878-
6879- /* disable priority elevation and enable statistics on all ports */
6880- __raw_writel(0, &priv->regs->ptype);
6881-
6882- /* enable statistics collection only on the host port */
6883- __raw_writel(0x7, &priv->regs->stat_port_en);
6884+ /* Add default VLAN */
6885+ if (!priv->data.dual_emac)
6886+ cpsw_add_default_vlan(priv);
6887+
6888+ if (!cpsw_common_res_usage_state(priv)) {
6889+ /* setup tx dma to fixed prio and zero offset */
6890+ cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
6891+ cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0);
6892+
6893+ /* disable priority elevation */
6894+ __raw_writel(0, &priv->regs->ptype);
6895+
6896+ /* enable statistics collection only on all ports */
6897+ __raw_writel(0x7, &priv->regs->stat_port_en);
6898+
6899+ if (WARN_ON(!priv->data.rx_descs))
6900+ priv->data.rx_descs = 128;
6901+
6902+ for (i = 0; i < priv->data.rx_descs; i++) {
6903+ struct sk_buff *skb;
6904+
6905+ ret = -ENOMEM;
6906+ skb = __netdev_alloc_skb_ip_align(priv->ndev,
6907+ priv->rx_packet_max, GFP_KERNEL);
6908+ if (!skb)
6909+ goto err_cleanup;
6910+ ret = cpdma_chan_submit(priv->rxch, skb, skb->data,
6911+ skb_tailroom(skb), 0);
6912+ if (ret < 0) {
6913+ kfree_skb(skb);
6914+ goto err_cleanup;
6915+ }
6916+ }
6917+ /* continue even if we didn't manage to submit all
6918+ * receive descs
6919+ */
6920+ cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i);
6921+ }
6922
6923- if (WARN_ON(!priv->data.rx_descs))
6924- priv->data.rx_descs = 128;
6925+ /* Enable Interrupt pacing if configured */
6926+ if (priv->coal_intvl != 0) {
6927+ struct ethtool_coalesce coal;
6928
6929- for (i = 0; i < priv->data.rx_descs; i++) {
6930- struct sk_buff *skb;
6931+ coal.rx_coalesce_usecs = (priv->coal_intvl << 4);
6932+ cpsw_set_coalesce(ndev, &coal);
6933+ }
6934
6935- ret = -ENOMEM;
6936- skb = netdev_alloc_skb_ip_align(priv->ndev,
6937- priv->rx_packet_max);
6938- if (!skb)
6939- break;
6940- ret = cpdma_chan_submit(priv->rxch, skb, skb->data,
6941- skb_tailroom(skb), GFP_KERNEL);
6942- if (WARN_ON(ret < 0))
6943- break;
6944+ prim_cpsw = cpsw_get_slave_priv(priv, 0);
6945+ if (prim_cpsw->irq_enabled == false) {
6946+ if ((priv == prim_cpsw) || !netif_running(prim_cpsw->ndev)) {
6947+ prim_cpsw->irq_enabled = true;
6948+ cpsw_enable_irq(prim_cpsw);
6949+ }
6950 }
6951- /* continue even if we didn't manage to submit all receive descs */
6952- cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i);
6953
6954 cpdma_ctlr_start(priv->dma);
6955 cpsw_intr_enable(priv);
6956 napi_enable(&priv->napi);
6957- cpdma_ctlr_eoi(priv->dma);
6958+ cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
6959+ cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
6960
6961+ if (priv->data.dual_emac)
6962+ priv->slaves[priv->emac_port].open_stat = true;
6963 return 0;
6964-}
6965
6966-static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv)
6967-{
6968- if (!slave->phy)
6969- return;
6970- phy_stop(slave->phy);
6971- phy_disconnect(slave->phy);
6972- slave->phy = NULL;
6973+err_cleanup:
6974+ cpdma_ctlr_stop(priv->dma);
6975+ for_each_slave(priv, cpsw_slave_stop, priv);
6976+ pm_runtime_put_sync(&priv->pdev->dev);
6977+ netif_carrier_off(priv->ndev);
6978+ return ret;
6979 }
6980
6981 static int cpsw_ndo_stop(struct net_device *ndev)
6982@@ -701,12 +1006,17 @@ static int cpsw_ndo_stop(struct net_device *ndev)
6983 netif_stop_queue(priv->ndev);
6984 napi_disable(&priv->napi);
6985 netif_carrier_off(priv->ndev);
6986- cpsw_intr_disable(priv);
6987- cpdma_ctlr_int_ctrl(priv->dma, false);
6988- cpdma_ctlr_stop(priv->dma);
6989- cpsw_ale_stop(priv->ale);
6990+
6991+ if (cpsw_common_res_usage_state(priv) <= 1) {
6992+ cpsw_intr_disable(priv);
6993+ cpdma_ctlr_int_ctrl(priv->dma, false);
6994+ cpdma_ctlr_stop(priv->dma);
6995+ cpsw_ale_stop(priv->ale);
6996+ }
6997 for_each_slave(priv, cpsw_slave_stop, priv);
6998 pm_runtime_put_sync(&priv->pdev->dev);
6999+ if (priv->data.dual_emac)
7000+ priv->slaves[priv->emac_port].open_stat = false;
7001 return 0;
7002 }
7003
7004@@ -724,18 +1034,24 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
7005 return NETDEV_TX_OK;
7006 }
7007
7008- if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && priv->cpts.tx_enable)
7009+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
7010+ priv->cpts->tx_enable)
7011 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
7012
7013 skb_tx_timestamp(skb);
7014
7015- ret = cpdma_chan_submit(priv->txch, skb, skb->data,
7016- skb->len, GFP_KERNEL);
7017+ ret = cpsw_tx_packet_submit(ndev, priv, skb);
7018 if (unlikely(ret != 0)) {
7019 cpsw_err(priv, tx_err, "desc submit failed\n");
7020 goto fail;
7021 }
7022
7023+ /* If there is no more tx desc left free then we need to
7024+ * tell the kernel to stop sending us tx frames.
7025+ */
7026+ if (unlikely(!cpdma_check_free_tx_desc(priv->txch)))
7027+ netif_stop_queue(ndev);
7028+
7029 return NETDEV_TX_OK;
7030 fail:
7031 priv->stats.tx_dropped++;
7032@@ -770,10 +1086,10 @@ static void cpsw_ndo_change_rx_flags(struct net_device *ndev, int flags)
7033
7034 static void cpsw_hwtstamp_v1(struct cpsw_priv *priv)
7035 {
7036- struct cpsw_slave *slave = &priv->slaves[priv->data.cpts_active_slave];
7037+ struct cpsw_slave *slave = &priv->slaves[priv->data.active_slave];
7038 u32 ts_en, seq_id;
7039
7040- if (!priv->cpts.tx_enable && !priv->cpts.rx_enable) {
7041+ if (!priv->cpts->tx_enable && !priv->cpts->rx_enable) {
7042 slave_write(slave, 0, CPSW1_TS_CTL);
7043 return;
7044 }
7045@@ -781,10 +1097,10 @@ static void cpsw_hwtstamp_v1(struct cpsw_priv *priv)
7046 seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588;
7047 ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS;
7048
7049- if (priv->cpts.tx_enable)
7050+ if (priv->cpts->tx_enable)
7051 ts_en |= CPSW_V1_TS_TX_EN;
7052
7053- if (priv->cpts.rx_enable)
7054+ if (priv->cpts->rx_enable)
7055 ts_en |= CPSW_V1_TS_RX_EN;
7056
7057 slave_write(slave, ts_en, CPSW1_TS_CTL);
7058@@ -793,16 +1109,21 @@ static void cpsw_hwtstamp_v1(struct cpsw_priv *priv)
7059
7060 static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
7061 {
7062- struct cpsw_slave *slave = &priv->slaves[priv->data.cpts_active_slave];
7063+ struct cpsw_slave *slave;
7064 u32 ctrl, mtype;
7065
7066+ if (priv->data.dual_emac)
7067+ slave = &priv->slaves[priv->emac_port];
7068+ else
7069+ slave = &priv->slaves[priv->data.active_slave];
7070+
7071 ctrl = slave_read(slave, CPSW2_CONTROL);
7072 ctrl &= ~CTRL_ALL_TS_MASK;
7073
7074- if (priv->cpts.tx_enable)
7075+ if (priv->cpts->tx_enable)
7076 ctrl |= CTRL_TX_TS_BITS;
7077
7078- if (priv->cpts.rx_enable)
7079+ if (priv->cpts->rx_enable)
7080 ctrl |= CTRL_RX_TS_BITS;
7081
7082 mtype = (30 << TS_SEQ_ID_OFFSET_SHIFT) | EVENT_MSG_BITS;
7083@@ -815,7 +1136,7 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
7084 static int cpsw_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
7085 {
7086 struct cpsw_priv *priv = netdev_priv(dev);
7087- struct cpts *cpts = &priv->cpts;
7088+ struct cpts *cpts = priv->cpts;
7089 struct hwtstamp_config cfg;
7090
7091 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
7092@@ -879,14 +1200,26 @@ static int cpsw_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
7093
7094 static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
7095 {
7096+ struct cpsw_priv *priv = netdev_priv(dev);
7097+ struct mii_ioctl_data *data = if_mii(req);
7098+ int slave_no = cpsw_slave_index(priv);
7099+
7100 if (!netif_running(dev))
7101 return -EINVAL;
7102
7103+ switch (cmd) {
7104 #ifdef CONFIG_TI_CPTS
7105- if (cmd == SIOCSHWTSTAMP)
7106+ case SIOCSHWTSTAMP:
7107 return cpsw_hwtstamp_ioctl(dev, req);
7108 #endif
7109- return -ENOTSUPP;
7110+ case SIOCGMIIPHY:
7111+ data->phy_id = priv->slaves[slave_no].phy->addr;
7112+ break;
7113+ default:
7114+ return -ENOTSUPP;
7115+ }
7116+
7117+ return 0;
7118 }
7119
7120 static void cpsw_ndo_tx_timeout(struct net_device *ndev)
7121@@ -901,7 +1234,9 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev)
7122 cpdma_chan_start(priv->txch);
7123 cpdma_ctlr_int_ctrl(priv->dma, true);
7124 cpsw_intr_enable(priv);
7125- cpdma_ctlr_eoi(priv->dma);
7126+ cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
7127+ cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
7128+
7129 }
7130
7131 static struct net_device_stats *cpsw_ndo_get_stats(struct net_device *ndev)
7132@@ -920,10 +1255,79 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev)
7133 cpsw_interrupt(ndev->irq, priv);
7134 cpdma_ctlr_int_ctrl(priv->dma, true);
7135 cpsw_intr_enable(priv);
7136- cpdma_ctlr_eoi(priv->dma);
7137+ cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
7138+ cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
7139+
7140 }
7141 #endif
7142
7143+static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv,
7144+ unsigned short vid)
7145+{
7146+ int ret;
7147+
7148+ ret = cpsw_ale_add_vlan(priv->ale, vid,
7149+ ALE_ALL_PORTS << priv->host_port,
7150+ 0, ALE_ALL_PORTS << priv->host_port,
7151+ (ALE_PORT_1 | ALE_PORT_2) << priv->host_port);
7152+ if (ret != 0)
7153+ return ret;
7154+
7155+ ret = cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
7156+ priv->host_port, ALE_VLAN, vid);
7157+ if (ret != 0)
7158+ goto clean_vid;
7159+
7160+ ret = cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
7161+ ALE_ALL_PORTS << priv->host_port,
7162+ ALE_VLAN, vid, 0);
7163+ if (ret != 0)
7164+ goto clean_vlan_ucast;
7165+ return 0;
7166+
7167+clean_vlan_ucast:
7168+ cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
7169+ priv->host_port, ALE_VLAN, vid);
7170+clean_vid:
7171+ cpsw_ale_del_vlan(priv->ale, vid, 0);
7172+ return ret;
7173+}
7174+
7175+static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
7176+ unsigned short vid)
7177+{
7178+ struct cpsw_priv *priv = netdev_priv(ndev);
7179+
7180+ if (vid == priv->data.default_vlan)
7181+ return 0;
7182+
7183+ dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid);
7184+ return cpsw_add_vlan_ale_entry(priv, vid);
7185+}
7186+
7187+static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev,
7188+ unsigned short vid)
7189+{
7190+ struct cpsw_priv *priv = netdev_priv(ndev);
7191+ int ret;
7192+
7193+ if (vid == priv->data.default_vlan)
7194+ return 0;
7195+
7196+ dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid);
7197+ ret = cpsw_ale_del_vlan(priv->ale, vid, 0);
7198+ if (ret != 0)
7199+ return ret;
7200+
7201+ ret = cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
7202+ priv->host_port, ALE_VLAN, vid);
7203+ if (ret != 0)
7204+ return ret;
7205+
7206+ return cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast,
7207+ 0, ALE_VLAN, vid);
7208+}
7209+
7210 static const struct net_device_ops cpsw_netdev_ops = {
7211 .ndo_open = cpsw_ndo_open,
7212 .ndo_stop = cpsw_ndo_stop,
7213@@ -938,15 +1342,18 @@ static const struct net_device_ops cpsw_netdev_ops = {
7214 #ifdef CONFIG_NET_POLL_CONTROLLER
7215 .ndo_poll_controller = cpsw_ndo_poll_controller,
7216 #endif
7217+ .ndo_vlan_rx_add_vid = cpsw_ndo_vlan_rx_add_vid,
7218+ .ndo_vlan_rx_kill_vid = cpsw_ndo_vlan_rx_kill_vid,
7219 };
7220
7221 static void cpsw_get_drvinfo(struct net_device *ndev,
7222 struct ethtool_drvinfo *info)
7223 {
7224 struct cpsw_priv *priv = netdev_priv(ndev);
7225- strcpy(info->driver, "TI CPSW Driver v1.0");
7226- strcpy(info->version, "1.0");
7227- strcpy(info->bus_info, priv->pdev->name);
7228+
7229+ strlcpy(info->driver, "TI CPSW Driver v1.0", sizeof(info->driver));
7230+ strlcpy(info->version, "1.0", sizeof(info->version));
7231+ strlcpy(info->bus_info, priv->pdev->name, sizeof(info->bus_info));
7232 }
7233
7234 static u32 cpsw_get_msglevel(struct net_device *ndev)
7235@@ -974,7 +1381,7 @@ static int cpsw_get_ts_info(struct net_device *ndev,
7236 SOF_TIMESTAMPING_RX_SOFTWARE |
7237 SOF_TIMESTAMPING_SOFTWARE |
7238 SOF_TIMESTAMPING_RAW_HARDWARE;
7239- info->phc_index = priv->cpts.phc_index;
7240+ info->phc_index = priv->cpts->phc_index;
7241 info->tx_types =
7242 (1 << HWTSTAMP_TX_OFF) |
7243 (1 << HWTSTAMP_TX_ON);
7244@@ -993,12 +1400,39 @@ static int cpsw_get_ts_info(struct net_device *ndev,
7245 return 0;
7246 }
7247
7248+static int cpsw_get_settings(struct net_device *ndev,
7249+ struct ethtool_cmd *ecmd)
7250+{
7251+ struct cpsw_priv *priv = netdev_priv(ndev);
7252+ int slave_no = cpsw_slave_index(priv);
7253+
7254+ if (priv->slaves[slave_no].phy)
7255+ return phy_ethtool_gset(priv->slaves[slave_no].phy, ecmd);
7256+ else
7257+ return -EOPNOTSUPP;
7258+}
7259+
7260+static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
7261+{
7262+ struct cpsw_priv *priv = netdev_priv(ndev);
7263+ int slave_no = cpsw_slave_index(priv);
7264+
7265+ if (priv->slaves[slave_no].phy)
7266+ return phy_ethtool_sset(priv->slaves[slave_no].phy, ecmd);
7267+ else
7268+ return -EOPNOTSUPP;
7269+}
7270+
7271 static const struct ethtool_ops cpsw_ethtool_ops = {
7272 .get_drvinfo = cpsw_get_drvinfo,
7273 .get_msglevel = cpsw_get_msglevel,
7274 .set_msglevel = cpsw_set_msglevel,
7275 .get_link = ethtool_op_get_link,
7276 .get_ts_info = cpsw_get_ts_info,
7277+ .get_settings = cpsw_get_settings,
7278+ .set_settings = cpsw_set_settings,
7279+ .get_coalesce = cpsw_get_coalesce,
7280+ .set_coalesce = cpsw_set_coalesce,
7281 };
7282
7283 static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
7284@@ -1011,6 +1445,7 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
7285 slave->data = data;
7286 slave->regs = regs + slave_reg_ofs;
7287 slave->sliver = regs + sliver_reg_ofs;
7288+ slave->port_vlan = data->dual_emac_res_vlan;
7289 }
7290
7291 static int cpsw_probe_dt(struct cpsw_platform_data *data,
7292@@ -1030,12 +1465,16 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
7293 }
7294 data->slaves = prop;
7295
7296- if (of_property_read_u32(node, "cpts_active_slave", &prop)) {
7297- pr_err("Missing cpts_active_slave property in the DT.\n");
7298- ret = -EINVAL;
7299- goto error_ret;
7300+ if (of_property_read_u32(node, "active_slave", &prop)) {
7301+ pr_err("Missing active_slave property in the DT.\n");
7302+ if (of_property_read_u32(node, "cpts_active_slave", &prop)) {
7303+ ret = -EINVAL;
7304+ goto error_ret;
7305+ } else {
7306+ pr_err("Using old cpts_active_slave as fallback.\n");
7307+ }
7308 }
7309- data->cpts_active_slave = prop;
7310+ data->active_slave = prop;
7311
7312 if (of_property_read_u32(node, "cpts_clock_mult", &prop)) {
7313 pr_err("Missing cpts_clock_mult property in the DT.\n");
7314@@ -1051,12 +1490,10 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
7315 }
7316 data->cpts_clock_shift = prop;
7317
7318- data->slave_data = kzalloc(sizeof(struct cpsw_slave_data) *
7319- data->slaves, GFP_KERNEL);
7320- if (!data->slave_data) {
7321- pr_err("Could not allocate slave memory.\n");
7322+ data->slave_data = kcalloc(data->slaves, sizeof(struct cpsw_slave_data),
7323+ GFP_KERNEL);
7324+ if (!data->slave_data)
7325 return -EINVAL;
7326- }
7327
7328 if (of_property_read_u32(node, "cpdma_channels", &prop)) {
7329 pr_err("Missing cpdma_channels property in the DT.\n");
7330@@ -1093,6 +1530,9 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
7331 }
7332 data->mac_control = prop;
7333
7334+ if (!of_property_read_u32(node, "dual_emac", &prop))
7335+ data->dual_emac = prop;
7336+
7337 /*
7338 * Populate all the child nodes here...
7339 */
7340@@ -1126,6 +1566,18 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
7341 if (mac_addr)
7342 memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN);
7343
7344+ if (data->dual_emac) {
7345+ if (of_property_read_u32(slave_node, "dual_emac_res_vlan",
7346+ &prop)) {
7347+ pr_err("Missing dual_emac_res_vlan in DT.\n");
7348+ slave_data->dual_emac_res_vlan = i+1;
7349+ pr_err("Using %d as Reserved VLAN for %d slave\n",
7350+ slave_data->dual_emac_res_vlan, i);
7351+ } else {
7352+ slave_data->dual_emac_res_vlan = prop;
7353+ }
7354+ }
7355+
7356 i++;
7357 }
7358
7359@@ -1136,9 +1588,84 @@ error_ret:
7360 return ret;
7361 }
7362
7363+static int cpsw_probe_dual_emac(struct platform_device *pdev,
7364+ struct cpsw_priv *priv)
7365+{
7366+ struct cpsw_platform_data *data = &priv->data;
7367+ struct net_device *ndev;
7368+ struct cpsw_priv *priv_sl2;
7369+ int ret = 0, i;
7370+
7371+ ndev = alloc_etherdev(sizeof(struct cpsw_priv));
7372+ if (!ndev) {
7373+ pr_err("cpsw: error allocating net_device\n");
7374+ return -ENOMEM;
7375+ }
7376+
7377+ priv_sl2 = netdev_priv(ndev);
7378+ spin_lock_init(&priv_sl2->lock);
7379+ priv_sl2->data = *data;
7380+ priv_sl2->pdev = pdev;
7381+ priv_sl2->ndev = ndev;
7382+ priv_sl2->dev = &ndev->dev;
7383+ priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
7384+ priv_sl2->rx_packet_max = max(rx_packet_max, 128);
7385+
7386+ if (is_valid_ether_addr(data->slave_data[1].mac_addr)) {
7387+ memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr,
7388+ ETH_ALEN);
7389+ pr_info("cpsw: Detected MACID = %pM\n", priv_sl2->mac_addr);
7390+ } else {
7391+ random_ether_addr(priv_sl2->mac_addr);
7392+ pr_info("cpsw: Random MACID = %pM\n", priv_sl2->mac_addr);
7393+ }
7394+ memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN);
7395+
7396+ priv_sl2->slaves = priv->slaves;
7397+ priv_sl2->clk = priv->clk;
7398+
7399+ priv_sl2->coal_intvl = 0;
7400+ priv_sl2->bus_freq_mhz = priv->bus_freq_mhz;
7401+
7402+ priv_sl2->cpsw_res = priv->cpsw_res;
7403+ priv_sl2->regs = priv->regs;
7404+ priv_sl2->host_port = priv->host_port;
7405+ priv_sl2->host_port_regs = priv->host_port_regs;
7406+ priv_sl2->wr_regs = priv->wr_regs;
7407+ priv_sl2->dma = priv->dma;
7408+ priv_sl2->txch = priv->txch;
7409+ priv_sl2->rxch = priv->rxch;
7410+ priv_sl2->ale = priv->ale;
7411+ priv_sl2->emac_port = 1;
7412+ priv->slaves[1].ndev = ndev;
7413+ priv_sl2->cpts = priv->cpts;
7414+ priv_sl2->version = priv->version;
7415+
7416+ for (i = 0; i < priv->num_irqs; i++) {
7417+ priv_sl2->irqs_table[i] = priv->irqs_table[i];
7418+ priv_sl2->num_irqs = priv->num_irqs;
7419+ }
7420+ ndev->features |= NETIF_F_HW_VLAN_FILTER;
7421+
7422+ ndev->netdev_ops = &cpsw_netdev_ops;
7423+ SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
7424+ netif_napi_add(ndev, &priv_sl2->napi, cpsw_poll, CPSW_POLL_WEIGHT);
7425+
7426+ /* register the network device */
7427+ SET_NETDEV_DEV(ndev, &pdev->dev);
7428+ ret = register_netdev(ndev);
7429+ if (ret) {
7430+ pr_err("cpsw: error registering net device\n");
7431+ free_netdev(ndev);
7432+ ret = -ENODEV;
7433+ }
7434+
7435+ return ret;
7436+}
7437+
7438 static int cpsw_probe(struct platform_device *pdev)
7439 {
7440- struct cpsw_platform_data *data = pdev->dev.platform_data;
7441+ struct cpsw_platform_data *data;
7442 struct net_device *ndev;
7443 struct cpsw_priv *priv;
7444 struct cpdma_params dma_params;
7445@@ -1162,6 +1689,11 @@ static int cpsw_probe(struct platform_device *pdev)
7446 priv->dev = &ndev->dev;
7447 priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
7448 priv->rx_packet_max = max(rx_packet_max, 128);
7449+ priv->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
7450+ if (!ndev) {
7451+ pr_err("error allocating cpts\n");
7452+ goto clean_ndev_ret;
7453+ }
7454
7455 /*
7456 * This may be required here for child devices.
7457@@ -1194,12 +1726,17 @@ static int cpsw_probe(struct platform_device *pdev)
7458 for (i = 0; i < data->slaves; i++)
7459 priv->slaves[i].slave_num = i;
7460
7461+ priv->slaves[0].ndev = ndev;
7462+ priv->emac_port = 0;
7463+
7464 priv->clk = clk_get(&pdev->dev, "fck");
7465 if (IS_ERR(priv->clk)) {
7466 dev_err(&pdev->dev, "fck is not found\n");
7467 ret = -ENODEV;
7468 goto clean_slave_ret;
7469 }
7470+ priv->coal_intvl = 0;
7471+ priv->bus_freq_mhz = clk_get_rate(priv->clk) / 1000000;
7472
7473 priv->cpsw_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
7474 if (!priv->cpsw_res) {
7475@@ -1248,7 +1785,7 @@ static int cpsw_probe(struct platform_device *pdev)
7476 switch (priv->version) {
7477 case CPSW_VERSION_1:
7478 priv->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
7479- priv->cpts.reg = ss_regs + CPSW1_CPTS_OFFSET;
7480+ priv->cpts->reg = ss_regs + CPSW1_CPTS_OFFSET;
7481 dma_params.dmaregs = ss_regs + CPSW1_CPDMA_OFFSET;
7482 dma_params.txhdp = ss_regs + CPSW1_STATERAM_OFFSET;
7483 ale_params.ale_regs = ss_regs + CPSW1_ALE_OFFSET;
7484@@ -1259,7 +1796,7 @@ static int cpsw_probe(struct platform_device *pdev)
7485 break;
7486 case CPSW_VERSION_2:
7487 priv->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
7488- priv->cpts.reg = ss_regs + CPSW2_CPTS_OFFSET;
7489+ priv->cpts->reg = ss_regs + CPSW2_CPTS_OFFSET;
7490 dma_params.dmaregs = ss_regs + CPSW2_CPDMA_OFFSET;
7491 dma_params.txhdp = ss_regs + CPSW2_STATERAM_OFFSET;
7492 ale_params.ale_regs = ss_regs + CPSW2_ALE_OFFSET;
7493@@ -1341,12 +1878,13 @@ static int cpsw_probe(struct platform_device *pdev)
7494 goto clean_ale_ret;
7495 }
7496 priv->irqs_table[k] = i;
7497- priv->num_irqs = k;
7498+ priv->num_irqs = k + 1;
7499 }
7500 k++;
7501 }
7502
7503- ndev->flags |= IFF_ALLMULTI; /* see cpsw_ndo_change_rx_flags() */
7504+ priv->irq_enabled = true;
7505+ ndev->features |= NETIF_F_HW_VLAN_FILTER;
7506
7507 ndev->netdev_ops = &cpsw_netdev_ops;
7508 SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
7509@@ -1361,17 +1899,26 @@ static int cpsw_probe(struct platform_device *pdev)
7510 goto clean_irq_ret;
7511 }
7512
7513- if (cpts_register(&pdev->dev, &priv->cpts,
7514+ if (cpts_register(&pdev->dev, priv->cpts,
7515 data->cpts_clock_mult, data->cpts_clock_shift))
7516 dev_err(priv->dev, "error registering cpts device\n");
7517
7518 cpsw_notice(priv, probe, "initialized device (regs %x, irq %d)\n",
7519 priv->cpsw_res->start, ndev->irq);
7520
7521+ if (priv->data.dual_emac) {
7522+ ret = cpsw_probe_dual_emac(pdev, priv);
7523+ if (ret) {
7524+ cpsw_err(priv, probe, "error probe slave 2 emac interface\n");
7525+ goto clean_irq_ret;
7526+ }
7527+ }
7528+
7529 return 0;
7530
7531 clean_irq_ret:
7532- free_irq(ndev->irq, priv);
7533+ for (i = 0; i < priv->num_irqs; i++)
7534+ free_irq(priv->irqs_table[i], priv);
7535 clean_ale_ret:
7536 cpsw_ale_destroy(priv->ale);
7537 clean_dma_ret:
7538@@ -1394,7 +1941,8 @@ clean_slave_ret:
7539 pm_runtime_disable(&pdev->dev);
7540 kfree(priv->slaves);
7541 clean_ndev_ret:
7542- free_netdev(ndev);
7543+ kfree(priv->data.slave_data);
7544+ free_netdev(priv->ndev);
7545 return ret;
7546 }
7547
7548@@ -1402,12 +1950,17 @@ static int cpsw_remove(struct platform_device *pdev)
7549 {
7550 struct net_device *ndev = platform_get_drvdata(pdev);
7551 struct cpsw_priv *priv = netdev_priv(ndev);
7552+ int i;
7553
7554- pr_info("removing device");
7555 platform_set_drvdata(pdev, NULL);
7556+ if (priv->data.dual_emac)
7557+ unregister_netdev(cpsw_get_slave_ndev(priv, 1));
7558+ unregister_netdev(ndev);
7559+
7560+ cpts_unregister(priv->cpts);
7561+ for (i = 0; i < priv->num_irqs; i++)
7562+ free_irq(priv->irqs_table[i], priv);
7563
7564- cpts_unregister(&priv->cpts);
7565- free_irq(ndev->irq, priv);
7566 cpsw_ale_destroy(priv->ale);
7567 cpdma_chan_destroy(priv->txch);
7568 cpdma_chan_destroy(priv->rxch);
7569@@ -1421,8 +1974,10 @@ static int cpsw_remove(struct platform_device *pdev)
7570 pm_runtime_disable(&pdev->dev);
7571 clk_put(priv->clk);
7572 kfree(priv->slaves);
7573+ kfree(priv->data.slave_data);
7574+ if (priv->data.dual_emac)
7575+ free_netdev(cpsw_get_slave_ndev(priv, 1));
7576 free_netdev(ndev);
7577-
7578 return 0;
7579 }
7580
7581@@ -1458,6 +2013,7 @@ static const struct of_device_id cpsw_of_mtable[] = {
7582 { .compatible = "ti,cpsw", },
7583 { /* sentinel */ },
7584 };
7585+MODULE_DEVICE_TABLE(of, cpsw_of_mtable);
7586
7587 static struct platform_driver cpsw_driver = {
7588 .driver = {
7589diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
7590index 0e9ccc2..7fa60d6 100644
7591--- a/drivers/net/ethernet/ti/cpsw_ale.c
7592+++ b/drivers/net/ethernet/ti/cpsw_ale.c
7593@@ -148,7 +148,7 @@ static int cpsw_ale_write(struct cpsw_ale *ale, int idx, u32 *ale_entry)
7594 return idx;
7595 }
7596
7597-static int cpsw_ale_match_addr(struct cpsw_ale *ale, u8 *addr)
7598+int cpsw_ale_match_addr(struct cpsw_ale *ale, u8 *addr, u16 vid)
7599 {
7600 u32 ale_entry[ALE_ENTRY_WORDS];
7601 int type, idx;
7602@@ -160,6 +160,8 @@ static int cpsw_ale_match_addr(struct cpsw_ale *ale, u8 *addr)
7603 type = cpsw_ale_get_entry_type(ale_entry);
7604 if (type != ALE_TYPE_ADDR && type != ALE_TYPE_VLAN_ADDR)
7605 continue;
7606+ if (cpsw_ale_get_vlan_id(ale_entry) != vid)
7607+ continue;
7608 cpsw_ale_get_addr(ale_entry, entry_addr);
7609 if (memcmp(entry_addr, addr, 6) == 0)
7610 return idx;
7611@@ -167,6 +169,22 @@ static int cpsw_ale_match_addr(struct cpsw_ale *ale, u8 *addr)
7612 return -ENOENT;
7613 }
7614
7615+int cpsw_ale_match_vlan(struct cpsw_ale *ale, u16 vid)
7616+{
7617+ u32 ale_entry[ALE_ENTRY_WORDS];
7618+ int type, idx;
7619+
7620+ for (idx = 0; idx < ale->params.ale_entries; idx++) {
7621+ cpsw_ale_read(ale, idx, ale_entry);
7622+ type = cpsw_ale_get_entry_type(ale_entry);
7623+ if (type != ALE_TYPE_VLAN)
7624+ continue;
7625+ if (cpsw_ale_get_vlan_id(ale_entry) == vid)
7626+ return idx;
7627+ }
7628+ return -ENOENT;
7629+}
7630+
7631 static int cpsw_ale_match_free(struct cpsw_ale *ale)
7632 {
7633 u32 ale_entry[ALE_ENTRY_WORDS];
7634@@ -274,19 +292,32 @@ int cpsw_ale_flush(struct cpsw_ale *ale, int port_mask)
7635 return 0;
7636 }
7637
7638-int cpsw_ale_add_ucast(struct cpsw_ale *ale, u8 *addr, int port, int flags)
7639+static inline void cpsw_ale_set_vlan_entry_type(u32 *ale_entry,
7640+ int flags, u16 vid)
7641+{
7642+ if (flags & ALE_VLAN) {
7643+ cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_VLAN_ADDR);
7644+ cpsw_ale_set_vlan_id(ale_entry, vid);
7645+ } else {
7646+ cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_ADDR);
7647+ }
7648+}
7649+
7650+int cpsw_ale_add_ucast(struct cpsw_ale *ale, u8 *addr, int port,
7651+ int flags, u16 vid)
7652 {
7653 u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0};
7654 int idx;
7655
7656- cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_ADDR);
7657+ cpsw_ale_set_vlan_entry_type(ale_entry, flags, vid);
7658+
7659 cpsw_ale_set_addr(ale_entry, addr);
7660 cpsw_ale_set_ucast_type(ale_entry, ALE_UCAST_PERSISTANT);
7661 cpsw_ale_set_secure(ale_entry, (flags & ALE_SECURE) ? 1 : 0);
7662 cpsw_ale_set_blocked(ale_entry, (flags & ALE_BLOCKED) ? 1 : 0);
7663 cpsw_ale_set_port_num(ale_entry, port);
7664
7665- idx = cpsw_ale_match_addr(ale, addr);
7666+ idx = cpsw_ale_match_addr(ale, addr, (flags & ALE_VLAN) ? vid : 0);
7667 if (idx < 0)
7668 idx = cpsw_ale_match_free(ale);
7669 if (idx < 0)
7670@@ -298,12 +329,13 @@ int cpsw_ale_add_ucast(struct cpsw_ale *ale, u8 *addr, int port, int flags)
7671 return 0;
7672 }
7673
7674-int cpsw_ale_del_ucast(struct cpsw_ale *ale, u8 *addr, int port)
7675+int cpsw_ale_del_ucast(struct cpsw_ale *ale, u8 *addr, int port,
7676+ int flags, u16 vid)
7677 {
7678 u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0};
7679 int idx;
7680
7681- idx = cpsw_ale_match_addr(ale, addr);
7682+ idx = cpsw_ale_match_addr(ale, addr, (flags & ALE_VLAN) ? vid : 0);
7683 if (idx < 0)
7684 return -ENOENT;
7685
7686@@ -313,18 +345,19 @@ int cpsw_ale_del_ucast(struct cpsw_ale *ale, u8 *addr, int port)
7687 }
7688
7689 int cpsw_ale_add_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
7690- int super, int mcast_state)
7691+ int flags, u16 vid, int mcast_state)
7692 {
7693 u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0};
7694 int idx, mask;
7695
7696- idx = cpsw_ale_match_addr(ale, addr);
7697+ idx = cpsw_ale_match_addr(ale, addr, (flags & ALE_VLAN) ? vid : 0);
7698 if (idx >= 0)
7699 cpsw_ale_read(ale, idx, ale_entry);
7700
7701- cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_ADDR);
7702+ cpsw_ale_set_vlan_entry_type(ale_entry, flags, vid);
7703+
7704 cpsw_ale_set_addr(ale_entry, addr);
7705- cpsw_ale_set_super(ale_entry, super);
7706+ cpsw_ale_set_super(ale_entry, (flags & ALE_BLOCKED) ? 1 : 0);
7707 cpsw_ale_set_mcast_state(ale_entry, mcast_state);
7708
7709 mask = cpsw_ale_get_port_mask(ale_entry);
7710@@ -342,12 +375,13 @@ int cpsw_ale_add_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
7711 return 0;
7712 }
7713
7714-int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask)
7715+int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
7716+ int flags, u16 vid)
7717 {
7718 u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0};
7719 int idx;
7720
7721- idx = cpsw_ale_match_addr(ale, addr);
7722+ idx = cpsw_ale_match_addr(ale, addr, (flags & ALE_VLAN) ? vid : 0);
7723 if (idx < 0)
7724 return -EINVAL;
7725
7726@@ -362,6 +396,55 @@ int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask)
7727 return 0;
7728 }
7729
7730+int cpsw_ale_add_vlan(struct cpsw_ale *ale, u16 vid, int port, int untag,
7731+ int reg_mcast, int unreg_mcast)
7732+{
7733+ u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0};
7734+ int idx;
7735+
7736+ idx = cpsw_ale_match_vlan(ale, vid);
7737+ if (idx >= 0)
7738+ cpsw_ale_read(ale, idx, ale_entry);
7739+
7740+ cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_VLAN);
7741+ cpsw_ale_set_vlan_id(ale_entry, vid);
7742+
7743+ cpsw_ale_set_vlan_untag_force(ale_entry, untag);
7744+ cpsw_ale_set_vlan_reg_mcast(ale_entry, reg_mcast);
7745+ cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast);
7746+ cpsw_ale_set_vlan_member_list(ale_entry, port);
7747+
7748+ if (idx < 0)
7749+ idx = cpsw_ale_match_free(ale);
7750+ if (idx < 0)
7751+ idx = cpsw_ale_find_ageable(ale);
7752+ if (idx < 0)
7753+ return -ENOMEM;
7754+
7755+ cpsw_ale_write(ale, idx, ale_entry);
7756+ return 0;
7757+}
7758+
7759+int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port_mask)
7760+{
7761+ u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0};
7762+ int idx;
7763+
7764+ idx = cpsw_ale_match_vlan(ale, vid);
7765+ if (idx < 0)
7766+ return -ENOENT;
7767+
7768+ cpsw_ale_read(ale, idx, ale_entry);
7769+
7770+ if (port_mask)
7771+ cpsw_ale_set_vlan_member_list(ale_entry, port_mask);
7772+ else
7773+ cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE);
7774+
7775+ cpsw_ale_write(ale, idx, ale_entry);
7776+ return 0;
7777+}
7778+
7779 struct ale_control_info {
7780 const char *name;
7781 int offset, port_offset;
7782diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h
7783index 2bd09cb..30daa12 100644
7784--- a/drivers/net/ethernet/ti/cpsw_ale.h
7785+++ b/drivers/net/ethernet/ti/cpsw_ale.h
7786@@ -64,8 +64,14 @@ enum cpsw_ale_port_state {
7787 };
7788
7789 /* ALE unicast entry flags - passed into cpsw_ale_add_ucast() */
7790-#define ALE_SECURE 1
7791-#define ALE_BLOCKED 2
7792+#define ALE_SECURE BIT(0)
7793+#define ALE_BLOCKED BIT(1)
7794+#define ALE_SUPER BIT(2)
7795+#define ALE_VLAN BIT(3)
7796+
7797+#define ALE_PORT_HOST BIT(0)
7798+#define ALE_PORT_1 BIT(1)
7799+#define ALE_PORT_2 BIT(2)
7800
7801 #define ALE_MCAST_FWD 0
7802 #define ALE_MCAST_BLOCK_LEARN_FWD 1
7803@@ -81,11 +87,17 @@ void cpsw_ale_stop(struct cpsw_ale *ale);
7804 int cpsw_ale_set_ageout(struct cpsw_ale *ale, int ageout);
7805 int cpsw_ale_flush(struct cpsw_ale *ale, int port_mask);
7806 int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask);
7807-int cpsw_ale_add_ucast(struct cpsw_ale *ale, u8 *addr, int port, int flags);
7808-int cpsw_ale_del_ucast(struct cpsw_ale *ale, u8 *addr, int port);
7809+int cpsw_ale_add_ucast(struct cpsw_ale *ale, u8 *addr, int port,
7810+ int flags, u16 vid);
7811+int cpsw_ale_del_ucast(struct cpsw_ale *ale, u8 *addr, int port,
7812+ int flags, u16 vid);
7813 int cpsw_ale_add_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
7814- int super, int mcast_state);
7815-int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask);
7816+ int flags, u16 vid, int mcast_state);
7817+int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
7818+ int flags, u16 vid);
7819+int cpsw_ale_add_vlan(struct cpsw_ale *ale, u16 vid, int port, int untag,
7820+ int reg_mcast, int unreg_mcast);
7821+int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port);
7822
7823 int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control);
7824 int cpsw_ale_control_set(struct cpsw_ale *ale, int port,
7825diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
7826index 4995673..49dfd59 100644
7827--- a/drivers/net/ethernet/ti/davinci_cpdma.c
7828+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
7829@@ -20,6 +20,7 @@
7830 #include <linux/err.h>
7831 #include <linux/dma-mapping.h>
7832 #include <linux/io.h>
7833+#include <linux/delay.h>
7834
7835 #include "davinci_cpdma.h"
7836
7837@@ -60,6 +61,9 @@
7838 #define CPDMA_DESC_EOQ BIT(28)
7839 #define CPDMA_DESC_TD_COMPLETE BIT(27)
7840 #define CPDMA_DESC_PASS_CRC BIT(26)
7841+#define CPDMA_DESC_TO_PORT_EN BIT(20)
7842+#define CPDMA_TO_PORT_SHIFT 16
7843+#define CPDMA_DESC_PORT_MASK (BIT(18) | BIT(17) | BIT(16))
7844
7845 #define CPDMA_TEARDOWN_VALUE 0xfffffffc
7846
7847@@ -105,13 +109,13 @@ struct cpdma_ctlr {
7848 };
7849
7850 struct cpdma_chan {
7851+ struct cpdma_desc __iomem *head, *tail;
7852+ void __iomem *hdp, *cp, *rxfree;
7853 enum cpdma_state state;
7854 struct cpdma_ctlr *ctlr;
7855 int chan_num;
7856 spinlock_t lock;
7857- struct cpdma_desc __iomem *head, *tail;
7858 int count;
7859- void __iomem *hdp, *cp, *rxfree;
7860 u32 mask;
7861 cpdma_handler_fn handler;
7862 enum dma_data_direction dir;
7863@@ -132,6 +136,14 @@ struct cpdma_chan {
7864 #define chan_write(chan, fld, v) __raw_writel(v, (chan)->fld)
7865 #define desc_write(desc, fld, v) __raw_writel((u32)(v), &(desc)->fld)
7866
7867+#define cpdma_desc_to_port(chan, mode, directed) \
7868+ do { \
7869+ if (!is_rx_chan(chan) && ((directed == 1) || \
7870+ (directed == 2))) \
7871+ mode |= (CPDMA_DESC_TO_PORT_EN | \
7872+ (directed << CPDMA_TO_PORT_SHIFT)); \
7873+ } while (0)
7874+
7875 /*
7876 * Utility constructs for a cpdma descriptor pool. Some devices (e.g. davinci
7877 * emac) have dedicated on-chip memory for these descriptors. Some other
7878@@ -217,17 +229,27 @@ desc_from_phys(struct cpdma_desc_pool *pool, dma_addr_t dma)
7879 }
7880
7881 static struct cpdma_desc __iomem *
7882-cpdma_desc_alloc(struct cpdma_desc_pool *pool, int num_desc)
7883+cpdma_desc_alloc(struct cpdma_desc_pool *pool, int num_desc, bool is_rx)
7884 {
7885 unsigned long flags;
7886 int index;
7887+ int desc_start;
7888+ int desc_end;
7889 struct cpdma_desc __iomem *desc = NULL;
7890
7891 spin_lock_irqsave(&pool->lock, flags);
7892
7893- index = bitmap_find_next_zero_area(pool->bitmap, pool->num_desc, 0,
7894- num_desc, 0);
7895- if (index < pool->num_desc) {
7896+ if (is_rx) {
7897+ desc_start = 0;
7898+ desc_end = pool->num_desc/2;
7899+ } else {
7900+ desc_start = pool->num_desc/2;
7901+ desc_end = pool->num_desc;
7902+ }
7903+
7904+ index = bitmap_find_next_zero_area(pool->bitmap,
7905+ desc_end, desc_start, num_desc, 0);
7906+ if (index < desc_end) {
7907 bitmap_set(pool->bitmap, index, num_desc);
7908 desc = pool->iomap + pool->desc_size * index;
7909 pool->used_desc++;
7910@@ -291,14 +313,16 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr)
7911 }
7912
7913 if (ctlr->params.has_soft_reset) {
7914- unsigned long timeout = jiffies + HZ/10;
7915+ unsigned timeout = 10 * 100;
7916
7917 dma_reg_write(ctlr, CPDMA_SOFTRESET, 1);
7918- while (time_before(jiffies, timeout)) {
7919+ while (timeout) {
7920 if (dma_reg_read(ctlr, CPDMA_SOFTRESET) == 0)
7921 break;
7922+ udelay(10);
7923+ timeout--;
7924 }
7925- WARN_ON(!time_before(jiffies, timeout));
7926+ WARN_ON(!timeout);
7927 }
7928
7929 for (i = 0; i < ctlr->num_chan; i++) {
7930@@ -439,10 +463,8 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
7931 if (ctlr->state != CPDMA_STATE_IDLE)
7932 cpdma_ctlr_stop(ctlr);
7933
7934- for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) {
7935- if (ctlr->channels[i])
7936- cpdma_chan_destroy(ctlr->channels[i]);
7937- }
7938+ for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++)
7939+ cpdma_chan_destroy(ctlr->channels[i]);
7940
7941 cpdma_desc_pool_destroy(ctlr->pool);
7942 spin_unlock_irqrestore(&ctlr->lock, flags);
7943@@ -473,11 +495,13 @@ int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable)
7944 spin_unlock_irqrestore(&ctlr->lock, flags);
7945 return 0;
7946 }
7947+EXPORT_SYMBOL_GPL(cpdma_ctlr_int_ctrl);
7948
7949-void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr)
7950+void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value)
7951 {
7952- dma_reg_write(ctlr, CPDMA_MACEOIVECTOR, 0);
7953+ dma_reg_write(ctlr, CPDMA_MACEOIVECTOR, value);
7954 }
7955+EXPORT_SYMBOL_GPL(cpdma_ctlr_eoi);
7956
7957 struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
7958 cpdma_handler_fn handler)
7959@@ -652,7 +676,7 @@ static void __cpdma_chan_submit(struct cpdma_chan *chan,
7960 }
7961
7962 int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
7963- int len, gfp_t gfp_mask)
7964+ int len, int directed)
7965 {
7966 struct cpdma_ctlr *ctlr = chan->ctlr;
7967 struct cpdma_desc __iomem *desc;
7968@@ -668,7 +692,7 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
7969 goto unlock_ret;
7970 }
7971
7972- desc = cpdma_desc_alloc(ctlr->pool, 1);
7973+ desc = cpdma_desc_alloc(ctlr->pool, 1, is_rx_chan(chan));
7974 if (!desc) {
7975 chan->stats.desc_alloc_fail++;
7976 ret = -ENOMEM;
7977@@ -682,6 +706,7 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
7978
7979 buffer = dma_map_single(ctlr->dev, data, len, chan->dir);
7980 mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP;
7981+ cpdma_desc_to_port(chan, mode, directed);
7982
7983 desc_write(desc, hw_next, 0);
7984 desc_write(desc, hw_buffer, buffer);
7985@@ -704,6 +729,29 @@ unlock_ret:
7986 }
7987 EXPORT_SYMBOL_GPL(cpdma_chan_submit);
7988
7989+bool cpdma_check_free_tx_desc(struct cpdma_chan *chan)
7990+{
7991+ unsigned long flags;
7992+ int index;
7993+ bool ret;
7994+ struct cpdma_ctlr *ctlr = chan->ctlr;
7995+ struct cpdma_desc_pool *pool = ctlr->pool;
7996+
7997+ spin_lock_irqsave(&pool->lock, flags);
7998+
7999+ index = bitmap_find_next_zero_area(pool->bitmap,
8000+ pool->num_desc, pool->num_desc/2, 1, 0);
8001+
8002+ if (index < pool->num_desc)
8003+ ret = true;
8004+ else
8005+ ret = false;
8006+
8007+ spin_unlock_irqrestore(&pool->lock, flags);
8008+ return ret;
8009+}
8010+EXPORT_SYMBOL_GPL(cpdma_check_free_tx_desc);
8011+
8012 static void __cpdma_chan_free(struct cpdma_chan *chan,
8013 struct cpdma_desc __iomem *desc,
8014 int outlen, int status)
8015@@ -728,6 +776,7 @@ static int __cpdma_chan_process(struct cpdma_chan *chan)
8016 struct cpdma_ctlr *ctlr = chan->ctlr;
8017 struct cpdma_desc __iomem *desc;
8018 int status, outlen;
8019+ int cb_status = 0;
8020 struct cpdma_desc_pool *pool = ctlr->pool;
8021 dma_addr_t desc_dma;
8022 unsigned long flags;
8023@@ -749,7 +798,8 @@ static int __cpdma_chan_process(struct cpdma_chan *chan)
8024 status = -EBUSY;
8025 goto unlock_ret;
8026 }
8027- status = status & (CPDMA_DESC_EOQ | CPDMA_DESC_TD_COMPLETE);
8028+ status = status & (CPDMA_DESC_EOQ | CPDMA_DESC_TD_COMPLETE |
8029+ CPDMA_DESC_PORT_MASK);
8030
8031 chan->head = desc_from_phys(pool, desc_read(desc, hw_next));
8032 chan_write(chan, cp, desc_dma);
8033@@ -762,8 +812,12 @@ static int __cpdma_chan_process(struct cpdma_chan *chan)
8034 }
8035
8036 spin_unlock_irqrestore(&chan->lock, flags);
8037+ if (unlikely(status & CPDMA_DESC_TD_COMPLETE))
8038+ cb_status = -ENOSYS;
8039+ else
8040+ cb_status = status;
8041
8042- __cpdma_chan_free(chan, desc, outlen, status);
8043+ __cpdma_chan_free(chan, desc, outlen, cb_status);
8044 return status;
8045
8046 unlock_ret:
8047@@ -822,7 +876,7 @@ int cpdma_chan_stop(struct cpdma_chan *chan)
8048 struct cpdma_desc_pool *pool = ctlr->pool;
8049 unsigned long flags;
8050 int ret;
8051- unsigned long timeout;
8052+ unsigned timeout;
8053
8054 spin_lock_irqsave(&chan->lock, flags);
8055 if (chan->state != CPDMA_STATE_ACTIVE) {
8056@@ -837,14 +891,15 @@ int cpdma_chan_stop(struct cpdma_chan *chan)
8057 dma_reg_write(ctlr, chan->td, chan_linear(chan));
8058
8059 /* wait for teardown complete */
8060- timeout = jiffies + HZ/10; /* 100 msec */
8061- while (time_before(jiffies, timeout)) {
8062+ timeout = 100 * 100; /* 100 ms */
8063+ while (timeout) {
8064 u32 cp = chan_read(chan, cp);
8065 if ((cp & CPDMA_TEARDOWN_VALUE) == CPDMA_TEARDOWN_VALUE)
8066 break;
8067- cpu_relax();
8068+ udelay(10);
8069+ timeout--;
8070 }
8071- WARN_ON(!time_before(jiffies, timeout));
8072+ WARN_ON(!timeout);
8073 chan_write(chan, cp, CPDMA_TEARDOWN_VALUE);
8074
8075 /* handle completed packets */
8076@@ -984,3 +1039,6 @@ unlock_ret:
8077 spin_unlock_irqrestore(&ctlr->lock, flags);
8078 return ret;
8079 }
8080+EXPORT_SYMBOL_GPL(cpdma_control_set);
8081+
8082+MODULE_LICENSE("GPL");
8083diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h
8084index afa19a0..86dee48 100644
8085--- a/drivers/net/ethernet/ti/davinci_cpdma.h
8086+++ b/drivers/net/ethernet/ti/davinci_cpdma.h
8087@@ -24,6 +24,13 @@
8088 #define __chan_linear(chan_num) ((chan_num) & (CPDMA_MAX_CHANNELS - 1))
8089 #define chan_linear(chan) __chan_linear((chan)->chan_num)
8090
8091+#define CPDMA_RX_SOURCE_PORT(__status__) ((__status__ >> 16) & 0x7)
8092+
8093+#define CPDMA_EOI_RX_THRESH 0x0
8094+#define CPDMA_EOI_RX 0x1
8095+#define CPDMA_EOI_TX 0x2
8096+#define CPDMA_EOI_MISC 0x3
8097+
8098 struct cpdma_params {
8099 struct device *dev;
8100 void __iomem *dmaregs;
8101@@ -82,12 +89,13 @@ int cpdma_chan_dump(struct cpdma_chan *chan);
8102 int cpdma_chan_get_stats(struct cpdma_chan *chan,
8103 struct cpdma_chan_stats *stats);
8104 int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
8105- int len, gfp_t gfp_mask);
8106+ int len, int directed);
8107 int cpdma_chan_process(struct cpdma_chan *chan, int quota);
8108
8109 int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable);
8110-void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr);
8111+void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value);
8112 int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable);
8113+bool cpdma_check_free_tx_desc(struct cpdma_chan *chan);
8114
8115 enum cpdma_control {
8116 CPDMA_CMD_IDLE, /* write-only */
8117diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
8118index 4ebcb24..5aa9e4d 100644
8119--- a/drivers/net/ethernet/ti/davinci_emac.c
8120+++ b/drivers/net/ethernet/ti/davinci_emac.c
8121@@ -120,7 +120,6 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1";
8122 #define EMAC_DEF_TX_CH (0) /* Default 0th channel */
8123 #define EMAC_DEF_RX_CH (0) /* Default 0th channel */
8124 #define EMAC_DEF_RX_NUM_DESC (128)
8125-#define EMAC_DEF_TX_NUM_DESC (128)
8126 #define EMAC_DEF_MAX_TX_CH (1) /* Max TX channels configured */
8127 #define EMAC_DEF_MAX_RX_CH (1) /* Max RX channels configured */
8128 #define EMAC_POLL_WEIGHT (64) /* Default NAPI poll weight */
8129@@ -342,7 +341,6 @@ struct emac_priv {
8130 u32 mac_hash2;
8131 u32 multicast_hash_cnt[EMAC_NUM_MULTICAST_BITS];
8132 u32 rx_addr_type;
8133- atomic_t cur_tx;
8134 const char *phy_id;
8135 #ifdef CONFIG_OF
8136 struct device_node *phy_node;
8137@@ -480,8 +478,8 @@ static void emac_dump_regs(struct emac_priv *priv)
8138 static void emac_get_drvinfo(struct net_device *ndev,
8139 struct ethtool_drvinfo *info)
8140 {
8141- strcpy(info->driver, emac_version_string);
8142- strcpy(info->version, EMAC_MODULE_VERSION);
8143+ strlcpy(info->driver, emac_version_string, sizeof(info->driver));
8144+ strlcpy(info->version, EMAC_MODULE_VERSION, sizeof(info->version));
8145 }
8146
8147 /**
8148@@ -1039,7 +1037,7 @@ static void emac_rx_handler(void *token, int len, int status)
8149
8150 recycle:
8151 ret = cpdma_chan_submit(priv->rxchan, skb, skb->data,
8152- skb_tailroom(skb), GFP_KERNEL);
8153+ skb_tailroom(skb), 0);
8154
8155 WARN_ON(ret == -ENOMEM);
8156 if (unlikely(ret < 0))
8157@@ -1050,10 +1048,10 @@ static void emac_tx_handler(void *token, int len, int status)
8158 {
8159 struct sk_buff *skb = token;
8160 struct net_device *ndev = skb->dev;
8161- struct emac_priv *priv = netdev_priv(ndev);
8162-
8163- atomic_dec(&priv->cur_tx);
8164
8165+ /* Check whether the queue is stopped due to stalled tx dma, if the
8166+ * queue is stopped then start the queue as we have free desc for tx
8167+ */
8168 if (unlikely(netif_queue_stopped(ndev)))
8169 netif_wake_queue(ndev);
8170 ndev->stats.tx_packets++;
8171@@ -1094,14 +1092,17 @@ static int emac_dev_xmit(struct sk_buff *skb, struct net_device *ndev)
8172 skb_tx_timestamp(skb);
8173
8174 ret_code = cpdma_chan_submit(priv->txchan, skb, skb->data, skb->len,
8175- GFP_KERNEL);
8176+ 0);
8177 if (unlikely(ret_code != 0)) {
8178 if (netif_msg_tx_err(priv) && net_ratelimit())
8179 dev_err(emac_dev, "DaVinci EMAC: desc submit failed");
8180 goto fail_tx;
8181 }
8182
8183- if (atomic_inc_return(&priv->cur_tx) >= EMAC_DEF_TX_NUM_DESC)
8184+ /* If there is no more tx desc left free then we need to
8185+ * tell the kernel to stop sending us tx frames.
8186+ */
8187+ if (unlikely(!cpdma_check_free_tx_desc(priv->txchan)))
8188 netif_stop_queue(ndev);
8189
8190 return NETDEV_TX_OK;
8191@@ -1264,7 +1265,6 @@ static int emac_dev_setmac_addr(struct net_device *ndev, void *addr)
8192 /* Store mac addr in priv and rx channel and set it in EMAC hw */
8193 memcpy(priv->mac_addr, sa->sa_data, ndev->addr_len);
8194 memcpy(ndev->dev_addr, sa->sa_data, ndev->addr_len);
8195- ndev->addr_assign_type &= ~NET_ADDR_RANDOM;
8196
8197 /* MAC address is configured only after the interface is enabled. */
8198 if (netif_running(ndev)) {
8199@@ -1438,7 +1438,7 @@ static int emac_poll(struct napi_struct *napi, int budget)
8200 * Polled functionality used by netconsole and others in non interrupt mode
8201 *
8202 */
8203-void emac_poll_controller(struct net_device *ndev)
8204+static void emac_poll_controller(struct net_device *ndev)
8205 {
8206 struct emac_priv *priv = netdev_priv(ndev);
8207
8208@@ -1558,7 +1558,7 @@ static int emac_dev_open(struct net_device *ndev)
8209 break;
8210
8211 ret = cpdma_chan_submit(priv->rxchan, skb, skb->data,
8212- skb_tailroom(skb), GFP_KERNEL);
8213+ skb_tailroom(skb), 0);
8214 if (WARN_ON(ret < 0))
8215 break;
8216 }
8217@@ -1865,21 +1865,18 @@ static int davinci_emac_probe(struct platform_device *pdev)
8218
8219
8220 /* obtain emac clock from kernel */
8221- emac_clk = clk_get(&pdev->dev, NULL);
8222+ emac_clk = devm_clk_get(&pdev->dev, NULL);
8223 if (IS_ERR(emac_clk)) {
8224 dev_err(&pdev->dev, "failed to get EMAC clock\n");
8225 return -EBUSY;
8226 }
8227 emac_bus_frequency = clk_get_rate(emac_clk);
8228- clk_put(emac_clk);
8229
8230 /* TODO: Probe PHY here if possible */
8231
8232 ndev = alloc_etherdev(sizeof(struct emac_priv));
8233- if (!ndev) {
8234- rc = -ENOMEM;
8235- goto no_ndev;
8236- }
8237+ if (!ndev)
8238+ return -ENOMEM;
8239
8240 platform_set_drvdata(pdev, ndev);
8241 priv = netdev_priv(ndev);
8242@@ -1893,7 +1890,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
8243 if (!pdata) {
8244 dev_err(&pdev->dev, "no platform data\n");
8245 rc = -ENODEV;
8246- goto probe_quit;
8247+ goto no_pdata;
8248 }
8249
8250 /* MAC addr and PHY mask , RMII enable info from platform_data */
8251@@ -1913,23 +1910,23 @@ static int davinci_emac_probe(struct platform_device *pdev)
8252 if (!res) {
8253 dev_err(&pdev->dev,"error getting res\n");
8254 rc = -ENOENT;
8255- goto probe_quit;
8256+ goto no_pdata;
8257 }
8258
8259 priv->emac_base_phys = res->start + pdata->ctrl_reg_offset;
8260 size = resource_size(res);
8261- if (!request_mem_region(res->start, size, ndev->name)) {
8262+ if (!devm_request_mem_region(&pdev->dev, res->start,
8263+ size, ndev->name)) {
8264 dev_err(&pdev->dev, "failed request_mem_region() for regs\n");
8265 rc = -ENXIO;
8266- goto probe_quit;
8267+ goto no_pdata;
8268 }
8269
8270- priv->remap_addr = ioremap(res->start, size);
8271+ priv->remap_addr = devm_ioremap(&pdev->dev, res->start, size);
8272 if (!priv->remap_addr) {
8273 dev_err(&pdev->dev, "unable to map IO\n");
8274 rc = -ENOMEM;
8275- release_mem_region(res->start, size);
8276- goto probe_quit;
8277+ goto no_pdata;
8278 }
8279 priv->emac_base = priv->remap_addr + pdata->ctrl_reg_offset;
8280 ndev->base_addr = (unsigned long)priv->remap_addr;
8281@@ -1962,7 +1959,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
8282 if (!priv->dma) {
8283 dev_err(&pdev->dev, "error initializing DMA\n");
8284 rc = -ENOMEM;
8285- goto no_dma;
8286+ goto no_pdata;
8287 }
8288
8289 priv->txchan = cpdma_chan_create(priv->dma, tx_chan_num(EMAC_DEF_TX_CH),
8290@@ -1971,14 +1968,14 @@ static int davinci_emac_probe(struct platform_device *pdev)
8291 emac_rx_handler);
8292 if (WARN_ON(!priv->txchan || !priv->rxchan)) {
8293 rc = -ENOMEM;
8294- goto no_irq_res;
8295+ goto no_cpdma_chan;
8296 }
8297
8298 res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
8299 if (!res) {
8300 dev_err(&pdev->dev, "error getting irq res\n");
8301 rc = -ENOENT;
8302- goto no_irq_res;
8303+ goto no_cpdma_chan;
8304 }
8305 ndev->irq = res->start;
8306
8307@@ -2000,7 +1997,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
8308 if (rc) {
8309 dev_err(&pdev->dev, "error in register_netdev\n");
8310 rc = -ENODEV;
8311- goto no_irq_res;
8312+ goto no_cpdma_chan;
8313 }
8314
8315
8316@@ -2015,20 +2012,14 @@ static int davinci_emac_probe(struct platform_device *pdev)
8317
8318 return 0;
8319
8320-no_irq_res:
8321+no_cpdma_chan:
8322 if (priv->txchan)
8323 cpdma_chan_destroy(priv->txchan);
8324 if (priv->rxchan)
8325 cpdma_chan_destroy(priv->rxchan);
8326 cpdma_ctlr_destroy(priv->dma);
8327-no_dma:
8328- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
8329- release_mem_region(res->start, resource_size(res));
8330- iounmap(priv->remap_addr);
8331-
8332-probe_quit:
8333+no_pdata:
8334 free_netdev(ndev);
8335-no_ndev:
8336 return rc;
8337 }
8338
8339@@ -2041,14 +2032,12 @@ no_ndev:
8340 */
8341 static int davinci_emac_remove(struct platform_device *pdev)
8342 {
8343- struct resource *res;
8344 struct net_device *ndev = platform_get_drvdata(pdev);
8345 struct emac_priv *priv = netdev_priv(ndev);
8346
8347 dev_notice(&ndev->dev, "DaVinci EMAC: davinci_emac_remove()\n");
8348
8349 platform_set_drvdata(pdev, NULL);
8350- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
8351
8352 if (priv->txchan)
8353 cpdma_chan_destroy(priv->txchan);
8354@@ -2056,10 +2045,7 @@ static int davinci_emac_remove(struct platform_device *pdev)
8355 cpdma_chan_destroy(priv->rxchan);
8356 cpdma_ctlr_destroy(priv->dma);
8357
8358- release_mem_region(res->start, resource_size(res));
8359-
8360 unregister_netdev(ndev);
8361- iounmap(priv->remap_addr);
8362 free_netdev(ndev);
8363
8364 return 0;
8365diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
8366index cca2550..12aec17 100644
8367--- a/drivers/net/ethernet/ti/davinci_mdio.c
8368+++ b/drivers/net/ethernet/ti/davinci_mdio.c
8369@@ -320,10 +320,8 @@ static int davinci_mdio_probe(struct platform_device *pdev)
8370 int ret, addr;
8371
8372 data = kzalloc(sizeof(*data), GFP_KERNEL);
8373- if (!data) {
8374- dev_err(dev, "failed to alloc device data\n");
8375+ if (!data)
8376 return -ENOMEM;
8377- }
8378
8379 data->bus = mdiobus_alloc();
8380 if (!data->bus) {
8381@@ -487,6 +485,7 @@ static const struct of_device_id davinci_mdio_of_mtable[] = {
8382 { .compatible = "ti,davinci_mdio", },
8383 { /* sentinel */ },
8384 };
8385+MODULE_DEVICE_TABLE(of, davinci_mdio_of_mtable);
8386
8387 static struct platform_driver davinci_mdio_driver = {
8388 .driver = {
8389diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
8390index 2272538..60c400f 100644
8391--- a/drivers/net/ethernet/ti/tlan.c
8392+++ b/drivers/net/ethernet/ti/tlan.c
8393@@ -320,6 +320,7 @@ static void tlan_remove_one(struct pci_dev *pdev)
8394 free_netdev(dev);
8395
8396 pci_set_drvdata(pdev, NULL);
8397+ cancel_work_sync(&priv->tlan_tqueue);
8398 }
8399
8400 static void tlan_start(struct net_device *dev)
8401@@ -1911,10 +1912,8 @@ static void tlan_reset_lists(struct net_device *dev)
8402 list->frame_size = TLAN_MAX_FRAME_SIZE;
8403 list->buffer[0].count = TLAN_MAX_FRAME_SIZE | TLAN_LAST_BUFFER;
8404 skb = netdev_alloc_skb_ip_align(dev, TLAN_MAX_FRAME_SIZE + 5);
8405- if (!skb) {
8406- netdev_err(dev, "Out of memory for received data\n");
8407+ if (!skb)
8408 break;
8409- }
8410
8411 list->buffer[0].address = pci_map_single(priv->pci_dev,
8412 skb->data,
8413diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
8414index d8b9b1e..7ae06a7 100644
8415--- a/drivers/net/rionet.c
8416+++ b/drivers/net/rionet.c
8417@@ -174,11 +174,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
8418 unsigned long flags;
8419 int add_num = 1;
8420
8421- local_irq_save(flags);
8422- if (!spin_trylock(&rnet->tx_lock)) {
8423- local_irq_restore(flags);
8424- return NETDEV_TX_LOCKED;
8425- }
8426+ spin_lock_irqsave(&rnet->tx_lock, flags);
8427
8428 if (is_multicast_ether_addr(eth->h_dest))
8429 add_num = nets[rnet->mport->id].nact;
8430diff --git a/drivers/of/base.c b/drivers/of/base.c
8431index 2390ddb..ec2fd1f 100644
8432--- a/drivers/of/base.c
8433+++ b/drivers/of/base.c
8434@@ -55,7 +55,7 @@ static DEFINE_MUTEX(of_aliases_mutex);
8435 /* use when traversing tree through the allnext, child, sibling,
8436 * or parent members of struct device_node.
8437 */
8438-DEFINE_RWLOCK(devtree_lock);
8439+DEFINE_RAW_SPINLOCK(devtree_lock);
8440
8441 int of_n_addr_cells(struct device_node *np)
8442 {
8443@@ -164,16 +164,14 @@ void of_node_put(struct device_node *node)
8444 EXPORT_SYMBOL(of_node_put);
8445 #endif /* CONFIG_OF_DYNAMIC */
8446
8447-struct property *of_find_property(const struct device_node *np,
8448- const char *name,
8449- int *lenp)
8450+static struct property *__of_find_property(const struct device_node *np,
8451+ const char *name, int *lenp)
8452 {
8453 struct property *pp;
8454
8455 if (!np)
8456 return NULL;
8457
8458- read_lock(&devtree_lock);
8459 for (pp = np->properties; pp; pp = pp->next) {
8460 if (of_prop_cmp(pp->name, name) == 0) {
8461 if (lenp)
8462@@ -181,7 +179,20 @@ struct property *of_find_property(const struct device_node *np,
8463 break;
8464 }
8465 }
8466- read_unlock(&devtree_lock);
8467+
8468+ return pp;
8469+}
8470+
8471+struct property *of_find_property(const struct device_node *np,
8472+ const char *name,
8473+ int *lenp)
8474+{
8475+ struct property *pp;
8476+ unsigned long flags;
8477+
8478+ raw_spin_lock_irqsave(&devtree_lock, flags);
8479+ pp = __of_find_property(np, name, lenp);
8480+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8481
8482 return pp;
8483 }
8484@@ -199,13 +210,13 @@ struct device_node *of_find_all_nodes(struct device_node *prev)
8485 {
8486 struct device_node *np;
8487
8488- read_lock(&devtree_lock);
8489+ raw_spin_lock(&devtree_lock);
8490 np = prev ? prev->allnext : of_allnodes;
8491 for (; np != NULL; np = np->allnext)
8492 if (of_node_get(np))
8493 break;
8494 of_node_put(prev);
8495- read_unlock(&devtree_lock);
8496+ raw_spin_unlock(&devtree_lock);
8497 return np;
8498 }
8499 EXPORT_SYMBOL(of_find_all_nodes);
8500@@ -214,8 +225,20 @@ EXPORT_SYMBOL(of_find_all_nodes);
8501 * Find a property with a given name for a given node
8502 * and return the value.
8503 */
8504+static const void *__of_get_property(const struct device_node *np,
8505+ const char *name, int *lenp)
8506+{
8507+ struct property *pp = __of_find_property(np, name, lenp);
8508+
8509+ return pp ? pp->value : NULL;
8510+}
8511+
8512+/*
8513+ * Find a property with a given name for a given node
8514+ * and return the value.
8515+ */
8516 const void *of_get_property(const struct device_node *np, const char *name,
8517- int *lenp)
8518+ int *lenp)
8519 {
8520 struct property *pp = of_find_property(np, name, lenp);
8521
8522@@ -226,13 +249,13 @@ EXPORT_SYMBOL(of_get_property);
8523 /** Checks if the given "compat" string matches one of the strings in
8524 * the device's "compatible" property
8525 */
8526-int of_device_is_compatible(const struct device_node *device,
8527- const char *compat)
8528+static int __of_device_is_compatible(const struct device_node *device,
8529+ const char *compat)
8530 {
8531 const char* cp;
8532 int cplen, l;
8533
8534- cp = of_get_property(device, "compatible", &cplen);
8535+ cp = __of_get_property(device, "compatible", &cplen);
8536 if (cp == NULL)
8537 return 0;
8538 while (cplen > 0) {
8539@@ -245,6 +268,21 @@ int of_device_is_compatible(const struct device_node *device,
8540
8541 return 0;
8542 }
8543+
8544+/** Checks if the given "compat" string matches one of the strings in
8545+ * the device's "compatible" property
8546+ */
8547+int of_device_is_compatible(const struct device_node *device,
8548+ const char *compat)
8549+{
8550+ unsigned long flags;
8551+ int res;
8552+
8553+ raw_spin_lock_irqsave(&devtree_lock, flags);
8554+ res = __of_device_is_compatible(device, compat);
8555+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8556+ return res;
8557+}
8558 EXPORT_SYMBOL(of_device_is_compatible);
8559
8560 /**
8561@@ -269,19 +307,19 @@ int of_machine_is_compatible(const char *compat)
8562 EXPORT_SYMBOL(of_machine_is_compatible);
8563
8564 /**
8565- * of_device_is_available - check if a device is available for use
8566+ * __of_device_is_available - check if a device is available for use
8567 *
8568- * @device: Node to check for availability
8569+ * @device: Node to check for availability, with locks already held
8570 *
8571 * Returns 1 if the status property is absent or set to "okay" or "ok",
8572 * 0 otherwise
8573 */
8574-int of_device_is_available(const struct device_node *device)
8575+static int __of_device_is_available(const struct device_node *device)
8576 {
8577 const char *status;
8578 int statlen;
8579
8580- status = of_get_property(device, "status", &statlen);
8581+ status = __of_get_property(device, "status", &statlen);
8582 if (status == NULL)
8583 return 1;
8584
8585@@ -292,6 +330,26 @@ int of_device_is_available(const struct device_node *device)
8586
8587 return 0;
8588 }
8589+
8590+/**
8591+ * of_device_is_available - check if a device is available for use
8592+ *
8593+ * @device: Node to check for availability
8594+ *
8595+ * Returns 1 if the status property is absent or set to "okay" or "ok",
8596+ * 0 otherwise
8597+ */
8598+int of_device_is_available(const struct device_node *device)
8599+{
8600+ unsigned long flags;
8601+ int res;
8602+
8603+ raw_spin_lock_irqsave(&devtree_lock, flags);
8604+ res = __of_device_is_available(device);
8605+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8606+ return res;
8607+
8608+}
8609 EXPORT_SYMBOL(of_device_is_available);
8610
8611 /**
8612@@ -304,13 +362,14 @@ EXPORT_SYMBOL(of_device_is_available);
8613 struct device_node *of_get_parent(const struct device_node *node)
8614 {
8615 struct device_node *np;
8616+ unsigned long flags;
8617
8618 if (!node)
8619 return NULL;
8620
8621- read_lock(&devtree_lock);
8622+ raw_spin_lock_irqsave(&devtree_lock, flags);
8623 np = of_node_get(node->parent);
8624- read_unlock(&devtree_lock);
8625+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8626 return np;
8627 }
8628 EXPORT_SYMBOL(of_get_parent);
8629@@ -329,14 +388,15 @@ EXPORT_SYMBOL(of_get_parent);
8630 struct device_node *of_get_next_parent(struct device_node *node)
8631 {
8632 struct device_node *parent;
8633+ unsigned long flags;
8634
8635 if (!node)
8636 return NULL;
8637
8638- read_lock(&devtree_lock);
8639+ raw_spin_lock_irqsave(&devtree_lock, flags);
8640 parent = of_node_get(node->parent);
8641 of_node_put(node);
8642- read_unlock(&devtree_lock);
8643+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8644 return parent;
8645 }
8646
8647@@ -352,14 +412,15 @@ struct device_node *of_get_next_child(const struct device_node *node,
8648 struct device_node *prev)
8649 {
8650 struct device_node *next;
8651+ unsigned long flags;
8652
8653- read_lock(&devtree_lock);
8654+ raw_spin_lock_irqsave(&devtree_lock, flags);
8655 next = prev ? prev->sibling : node->child;
8656 for (; next; next = next->sibling)
8657 if (of_node_get(next))
8658 break;
8659 of_node_put(prev);
8660- read_unlock(&devtree_lock);
8661+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8662 return next;
8663 }
8664 EXPORT_SYMBOL(of_get_next_child);
8665@@ -377,16 +438,16 @@ struct device_node *of_get_next_available_child(const struct device_node *node,
8666 {
8667 struct device_node *next;
8668
8669- read_lock(&devtree_lock);
8670+ raw_spin_lock(&devtree_lock);
8671 next = prev ? prev->sibling : node->child;
8672 for (; next; next = next->sibling) {
8673- if (!of_device_is_available(next))
8674+ if (!__of_device_is_available(next))
8675 continue;
8676 if (of_node_get(next))
8677 break;
8678 }
8679 of_node_put(prev);
8680- read_unlock(&devtree_lock);
8681+ raw_spin_unlock(&devtree_lock);
8682 return next;
8683 }
8684 EXPORT_SYMBOL(of_get_next_available_child);
8685@@ -424,14 +485,15 @@ EXPORT_SYMBOL(of_get_child_by_name);
8686 struct device_node *of_find_node_by_path(const char *path)
8687 {
8688 struct device_node *np = of_allnodes;
8689+ unsigned long flags;
8690
8691- read_lock(&devtree_lock);
8692+ raw_spin_lock_irqsave(&devtree_lock, flags);
8693 for (; np; np = np->allnext) {
8694 if (np->full_name && (of_node_cmp(np->full_name, path) == 0)
8695 && of_node_get(np))
8696 break;
8697 }
8698- read_unlock(&devtree_lock);
8699+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8700 return np;
8701 }
8702 EXPORT_SYMBOL(of_find_node_by_path);
8703@@ -451,15 +513,16 @@ struct device_node *of_find_node_by_name(struct device_node *from,
8704 const char *name)
8705 {
8706 struct device_node *np;
8707+ unsigned long flags;
8708
8709- read_lock(&devtree_lock);
8710+ raw_spin_lock_irqsave(&devtree_lock, flags);
8711 np = from ? from->allnext : of_allnodes;
8712 for (; np; np = np->allnext)
8713 if (np->name && (of_node_cmp(np->name, name) == 0)
8714 && of_node_get(np))
8715 break;
8716 of_node_put(from);
8717- read_unlock(&devtree_lock);
8718+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8719 return np;
8720 }
8721 EXPORT_SYMBOL(of_find_node_by_name);
8722@@ -480,15 +543,16 @@ struct device_node *of_find_node_by_type(struct device_node *from,
8723 const char *type)
8724 {
8725 struct device_node *np;
8726+ unsigned long flags;
8727
8728- read_lock(&devtree_lock);
8729+ raw_spin_lock_irqsave(&devtree_lock, flags);
8730 np = from ? from->allnext : of_allnodes;
8731 for (; np; np = np->allnext)
8732 if (np->type && (of_node_cmp(np->type, type) == 0)
8733 && of_node_get(np))
8734 break;
8735 of_node_put(from);
8736- read_unlock(&devtree_lock);
8737+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8738 return np;
8739 }
8740 EXPORT_SYMBOL(of_find_node_by_type);
8741@@ -511,18 +575,20 @@ struct device_node *of_find_compatible_node(struct device_node *from,
8742 const char *type, const char *compatible)
8743 {
8744 struct device_node *np;
8745+ unsigned long flags;
8746
8747- read_lock(&devtree_lock);
8748+ raw_spin_lock_irqsave(&devtree_lock, flags);
8749 np = from ? from->allnext : of_allnodes;
8750 for (; np; np = np->allnext) {
8751 if (type
8752 && !(np->type && (of_node_cmp(np->type, type) == 0)))
8753 continue;
8754- if (of_device_is_compatible(np, compatible) && of_node_get(np))
8755+ if (__of_device_is_compatible(np, compatible) &&
8756+ of_node_get(np))
8757 break;
8758 }
8759 of_node_put(from);
8760- read_unlock(&devtree_lock);
8761+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8762 return np;
8763 }
8764 EXPORT_SYMBOL(of_find_compatible_node);
8765@@ -544,8 +610,9 @@ struct device_node *of_find_node_with_property(struct device_node *from,
8766 {
8767 struct device_node *np;
8768 struct property *pp;
8769+ unsigned long flags;
8770
8771- read_lock(&devtree_lock);
8772+ raw_spin_lock_irqsave(&devtree_lock, flags);
8773 np = from ? from->allnext : of_allnodes;
8774 for (; np; np = np->allnext) {
8775 for (pp = np->properties; pp; pp = pp->next) {
8776@@ -557,20 +624,14 @@ struct device_node *of_find_node_with_property(struct device_node *from,
8777 }
8778 out:
8779 of_node_put(from);
8780- read_unlock(&devtree_lock);
8781+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8782 return np;
8783 }
8784 EXPORT_SYMBOL(of_find_node_with_property);
8785
8786-/**
8787- * of_match_node - Tell if an device_node has a matching of_match structure
8788- * @matches: array of of device match structures to search in
8789- * @node: the of device structure to match against
8790- *
8791- * Low level utility function used by device matching.
8792- */
8793-const struct of_device_id *of_match_node(const struct of_device_id *matches,
8794- const struct device_node *node)
8795+static
8796+const struct of_device_id *__of_match_node(const struct of_device_id *matches,
8797+ const struct device_node *node)
8798 {
8799 if (!matches)
8800 return NULL;
8801@@ -584,14 +645,33 @@ const struct of_device_id *of_match_node(const struct of_device_id *matches,
8802 match &= node->type
8803 && !strcmp(matches->type, node->type);
8804 if (matches->compatible[0])
8805- match &= of_device_is_compatible(node,
8806- matches->compatible);
8807+ match &= __of_device_is_compatible(node,
8808+ matches->compatible);
8809 if (match)
8810 return matches;
8811 matches++;
8812 }
8813 return NULL;
8814 }
8815+
8816+/**
8817+ * of_match_node - Tell if an device_node has a matching of_match structure
8818+ * @matches: array of of device match structures to search in
8819+ * @node: the of device structure to match against
8820+ *
8821+ * Low level utility function used by device matching.
8822+ */
8823+const struct of_device_id *of_match_node(const struct of_device_id *matches,
8824+ const struct device_node *node)
8825+{
8826+ const struct of_device_id *match;
8827+ unsigned long flags;
8828+
8829+ raw_spin_lock_irqsave(&devtree_lock, flags);
8830+ match = __of_match_node(matches, node);
8831+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8832+ return match;
8833+}
8834 EXPORT_SYMBOL(of_match_node);
8835
8836 /**
8837@@ -612,21 +692,22 @@ struct device_node *of_find_matching_node_and_match(struct device_node *from,
8838 const struct of_device_id **match)
8839 {
8840 struct device_node *np;
8841+ unsigned long flags;
8842
8843 if (match)
8844 *match = NULL;
8845
8846- read_lock(&devtree_lock);
8847+ raw_spin_lock_irqsave(&devtree_lock, flags);
8848 np = from ? from->allnext : of_allnodes;
8849 for (; np; np = np->allnext) {
8850- if (of_match_node(matches, np) && of_node_get(np)) {
8851+ if (__of_match_node(matches, np) && of_node_get(np)) {
8852 if (match)
8853 *match = matches;
8854 break;
8855 }
8856 }
8857 of_node_put(from);
8858- read_unlock(&devtree_lock);
8859+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8860 return np;
8861 }
8862 EXPORT_SYMBOL(of_find_matching_node_and_match);
8863@@ -669,12 +750,12 @@ struct device_node *of_find_node_by_phandle(phandle handle)
8864 {
8865 struct device_node *np;
8866
8867- read_lock(&devtree_lock);
8868+ raw_spin_lock(&devtree_lock);
8869 for (np = of_allnodes; np; np = np->allnext)
8870 if (np->phandle == handle)
8871 break;
8872 of_node_get(np);
8873- read_unlock(&devtree_lock);
8874+ raw_spin_unlock(&devtree_lock);
8875 return np;
8876 }
8877 EXPORT_SYMBOL(of_find_node_by_phandle);
8878@@ -1146,18 +1227,18 @@ int of_add_property(struct device_node *np, struct property *prop)
8879 return rc;
8880
8881 prop->next = NULL;
8882- write_lock_irqsave(&devtree_lock, flags);
8883+ raw_spin_lock_irqsave(&devtree_lock, flags);
8884 next = &np->properties;
8885 while (*next) {
8886 if (strcmp(prop->name, (*next)->name) == 0) {
8887 /* duplicate ! don't insert it */
8888- write_unlock_irqrestore(&devtree_lock, flags);
8889+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8890 return -1;
8891 }
8892 next = &(*next)->next;
8893 }
8894 *next = prop;
8895- write_unlock_irqrestore(&devtree_lock, flags);
8896+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8897
8898 #ifdef CONFIG_PROC_DEVICETREE
8899 /* try to add to proc as well if it was initialized */
8900@@ -1187,7 +1268,7 @@ int of_remove_property(struct device_node *np, struct property *prop)
8901 if (rc)
8902 return rc;
8903
8904- write_lock_irqsave(&devtree_lock, flags);
8905+ raw_spin_lock_irqsave(&devtree_lock, flags);
8906 next = &np->properties;
8907 while (*next) {
8908 if (*next == prop) {
8909@@ -1200,7 +1281,7 @@ int of_remove_property(struct device_node *np, struct property *prop)
8910 }
8911 next = &(*next)->next;
8912 }
8913- write_unlock_irqrestore(&devtree_lock, flags);
8914+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8915
8916 if (!found)
8917 return -ENODEV;
8918@@ -1240,7 +1321,7 @@ int of_update_property(struct device_node *np, struct property *newprop)
8919 if (!oldprop)
8920 return of_add_property(np, newprop);
8921
8922- write_lock_irqsave(&devtree_lock, flags);
8923+ raw_spin_lock_irqsave(&devtree_lock, flags);
8924 next = &np->properties;
8925 while (*next) {
8926 if (*next == oldprop) {
8927@@ -1254,7 +1335,7 @@ int of_update_property(struct device_node *np, struct property *newprop)
8928 }
8929 next = &(*next)->next;
8930 }
8931- write_unlock_irqrestore(&devtree_lock, flags);
8932+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8933
8934 if (!found)
8935 return -ENODEV;
8936@@ -1327,12 +1408,12 @@ int of_attach_node(struct device_node *np)
8937 if (rc)
8938 return rc;
8939
8940- write_lock_irqsave(&devtree_lock, flags);
8941+ raw_spin_lock_irqsave(&devtree_lock, flags);
8942 np->sibling = np->parent->child;
8943 np->allnext = of_allnodes;
8944 np->parent->child = np;
8945 of_allnodes = np;
8946- write_unlock_irqrestore(&devtree_lock, flags);
8947+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8948
8949 of_add_proc_dt_entry(np);
8950 return 0;
8951@@ -1375,17 +1456,17 @@ int of_detach_node(struct device_node *np)
8952 if (rc)
8953 return rc;
8954
8955- write_lock_irqsave(&devtree_lock, flags);
8956+ raw_spin_lock_irqsave(&devtree_lock, flags);
8957
8958 if (of_node_check_flag(np, OF_DETACHED)) {
8959 /* someone already detached it */
8960- write_unlock_irqrestore(&devtree_lock, flags);
8961+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8962 return rc;
8963 }
8964
8965 parent = np->parent;
8966 if (!parent) {
8967- write_unlock_irqrestore(&devtree_lock, flags);
8968+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8969 return rc;
8970 }
8971
8972@@ -1412,7 +1493,7 @@ int of_detach_node(struct device_node *np)
8973 }
8974
8975 of_node_set_flag(np, OF_DETACHED);
8976- write_unlock_irqrestore(&devtree_lock, flags);
8977+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
8978
8979 of_remove_proc_dt_entry(np);
8980 return rc;
8981diff --git a/drivers/pci/access.c b/drivers/pci/access.c
8982index 32046c5..0941838 100644
8983--- a/drivers/pci/access.c
8984+++ b/drivers/pci/access.c
8985@@ -465,7 +465,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev)
8986 WARN_ON(!dev->block_cfg_access);
8987
8988 dev->block_cfg_access = 0;
8989- wake_up_all(&pci_cfg_wait);
8990+ wake_up_all_locked(&pci_cfg_wait);
8991 raw_spin_unlock_irqrestore(&pci_lock, flags);
8992 }
8993 EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
8994diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
8995index 666b7ac..61c1d2a 100644
8996--- a/drivers/scsi/fcoe/fcoe.c
8997+++ b/drivers/scsi/fcoe/fcoe.c
8998@@ -1272,7 +1272,7 @@ static void fcoe_percpu_thread_destroy(unsigned int cpu)
8999 struct sk_buff *skb;
9000 #ifdef CONFIG_SMP
9001 struct fcoe_percpu_s *p0;
9002- unsigned targ_cpu = get_cpu();
9003+ unsigned targ_cpu = get_cpu_light();
9004 #endif /* CONFIG_SMP */
9005
9006 FCOE_DBG("Destroying receive thread for CPU %d\n", cpu);
9007@@ -1328,7 +1328,7 @@ static void fcoe_percpu_thread_destroy(unsigned int cpu)
9008 kfree_skb(skb);
9009 spin_unlock_bh(&p->fcoe_rx_list.lock);
9010 }
9011- put_cpu();
9012+ put_cpu_light();
9013 #else
9014 /*
9015 * This a non-SMP scenario where the singular Rx thread is
9016@@ -1546,11 +1546,11 @@ err2:
9017 static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
9018 {
9019 struct fcoe_percpu_s *fps;
9020- int rc;
9021+ int rc, cpu = get_cpu_light();
9022
9023- fps = &get_cpu_var(fcoe_percpu);
9024+ fps = &per_cpu(fcoe_percpu, cpu);
9025 rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
9026- put_cpu_var(fcoe_percpu);
9027+ put_cpu_light();
9028
9029 return rc;
9030 }
9031@@ -1745,11 +1745,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport,
9032 return 0;
9033 }
9034
9035- stats = per_cpu_ptr(lport->stats, get_cpu());
9036+ stats = per_cpu_ptr(lport->stats, get_cpu_light());
9037 stats->InvalidCRCCount++;
9038 if (stats->InvalidCRCCount < 5)
9039 printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
9040- put_cpu();
9041+ put_cpu_light();
9042 return -EINVAL;
9043 }
9044
9045@@ -1825,13 +1825,13 @@ static void fcoe_recv_frame(struct sk_buff *skb)
9046 goto drop;
9047
9048 if (!fcoe_filter_frames(lport, fp)) {
9049- put_cpu();
9050+ put_cpu_light();
9051 fc_exch_recv(lport, fp);
9052 return;
9053 }
9054 drop:
9055 stats->ErrorFrames++;
9056- put_cpu();
9057+ put_cpu_light();
9058 kfree_skb(skb);
9059 }
9060
9061diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
9062index 4a909d7..69d36cc 100644
9063--- a/drivers/scsi/fcoe/fcoe_ctlr.c
9064+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
9065@@ -792,7 +792,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
9066
9067 INIT_LIST_HEAD(&del_list);
9068
9069- stats = per_cpu_ptr(fip->lp->stats, get_cpu());
9070+ stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
9071
9072 list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
9073 deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
9074@@ -828,7 +828,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
9075 sel_time = fcf->time;
9076 }
9077 }
9078- put_cpu();
9079+ put_cpu_light();
9080
9081 list_for_each_entry_safe(fcf, next, &del_list, list) {
9082 /* Removes fcf from current list */
9083diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
9084index c772d8d..67ca13a 100644
9085--- a/drivers/scsi/libfc/fc_exch.c
9086+++ b/drivers/scsi/libfc/fc_exch.c
9087@@ -730,10 +730,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
9088 }
9089 memset(ep, 0, sizeof(*ep));
9090
9091- cpu = get_cpu();
9092+ cpu = get_cpu_light();
9093 pool = per_cpu_ptr(mp->pool, cpu);
9094 spin_lock_bh(&pool->lock);
9095- put_cpu();
9096+ put_cpu_light();
9097
9098 /* peek cache of free slot */
9099 if (pool->left != FC_XID_UNKNOWN) {
9100diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
9101index c0462c0..41192aa 100644
9102--- a/drivers/scsi/qla2xxx/qla_inline.h
9103+++ b/drivers/scsi/qla2xxx/qla_inline.h
9104@@ -36,12 +36,12 @@ qla2x00_poll(struct rsp_que *rsp)
9105 {
9106 unsigned long flags;
9107 struct qla_hw_data *ha = rsp->hw;
9108- local_irq_save(flags);
9109+ local_irq_save_nort(flags);
9110 if (IS_QLA82XX(ha))
9111 qla82xx_poll(0, rsp);
9112 else
9113 ha->isp_ops->intr_handler(0, rsp);
9114- local_irq_restore(flags);
9115+ local_irq_restore_nort(flags);
9116 }
9117
9118 static inline uint8_t *
9119diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
9120index b610f52..56c4166 100644
9121--- a/drivers/spi/spi-omap2-mcspi.c
9122+++ b/drivers/spi/spi-omap2-mcspi.c
9123@@ -285,8 +285,12 @@ static int mcspi_wait_for_reg_bit(void __iomem *reg, unsigned long bit)
9124
9125 timeout = jiffies + msecs_to_jiffies(1000);
9126 while (!(__raw_readl(reg) & bit)) {
9127- if (time_after(jiffies, timeout))
9128- return -1;
9129+ if (time_after(jiffies, timeout)) {
9130+ if (!(__raw_readl(reg) & bit))
9131+ return -ETIMEDOUT;
9132+ else
9133+ return 0;
9134+ }
9135 cpu_relax();
9136 }
9137 return 0;
9138diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c
9139index 584fa08..0cdea29 100644
9140--- a/drivers/tty/serial/8250/8250.c
9141+++ b/drivers/tty/serial/8250/8250.c
9142@@ -38,6 +38,7 @@
9143 #include <linux/nmi.h>
9144 #include <linux/mutex.h>
9145 #include <linux/slab.h>
9146+#include <linux/kdb.h>
9147 #ifdef CONFIG_SPARC
9148 #include <linux/sunserialcore.h>
9149 #endif
9150@@ -80,7 +81,16 @@ static unsigned int skip_txen_test; /* force skip of txen test at init time */
9151 #define DEBUG_INTR(fmt...) do { } while (0)
9152 #endif
9153
9154-#define PASS_LIMIT 512
9155+/*
9156+ * On -rt we can have a more delays, and legitimately
9157+ * so - so don't drop work spuriously and spam the
9158+ * syslog:
9159+ */
9160+#ifdef CONFIG_PREEMPT_RT_FULL
9161+# define PASS_LIMIT 1000000
9162+#else
9163+# define PASS_LIMIT 512
9164+#endif
9165
9166 #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
9167
9168@@ -2900,14 +2910,10 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count)
9169
9170 touch_nmi_watchdog();
9171
9172- local_irq_save(flags);
9173- if (port->sysrq) {
9174- /* serial8250_handle_irq() already took the lock */
9175- locked = 0;
9176- } else if (oops_in_progress) {
9177- locked = spin_trylock(&port->lock);
9178- } else
9179- spin_lock(&port->lock);
9180+ if (port->sysrq || oops_in_progress || in_kdb_printk())
9181+ locked = spin_trylock_irqsave(&port->lock, flags);
9182+ else
9183+ spin_lock_irqsave(&port->lock, flags);
9184
9185 /*
9186 * First save the IER then disable the interrupts
9187@@ -2939,8 +2945,7 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count)
9188 serial8250_modem_status(up);
9189
9190 if (locked)
9191- spin_unlock(&port->lock);
9192- local_irq_restore(flags);
9193+ spin_unlock_irqrestore(&port->lock, flags);
9194 }
9195
9196 static int __init serial8250_console_setup(struct console *co, char *options)
9197diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
9198index 7fca402..d7294f7 100644
9199--- a/drivers/tty/serial/amba-pl011.c
9200+++ b/drivers/tty/serial/amba-pl011.c
9201@@ -1779,13 +1779,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
9202
9203 clk_enable(uap->clk);
9204
9205- local_irq_save(flags);
9206+ /*
9207+ * local_irq_save(flags);
9208+ *
9209+ * This local_irq_save() is nonsense. If we come in via sysrq
9210+ * handling then interrupts are already disabled. Aside of
9211+ * that the port.sysrq check is racy on SMP regardless.
9212+ */
9213 if (uap->port.sysrq)
9214 locked = 0;
9215 else if (oops_in_progress)
9216- locked = spin_trylock(&uap->port.lock);
9217+ locked = spin_trylock_irqsave(&uap->port.lock, flags);
9218 else
9219- spin_lock(&uap->port.lock);
9220+ spin_lock_irqsave(&uap->port.lock, flags);
9221
9222 /*
9223 * First save the CR then disable the interrupts
9224@@ -1807,8 +1813,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
9225 writew(old_cr, uap->port.membase + UART011_CR);
9226
9227 if (locked)
9228- spin_unlock(&uap->port.lock);
9229- local_irq_restore(flags);
9230+ spin_unlock_irqrestore(&uap->port.lock, flags);
9231
9232 clk_disable(uap->clk);
9233 }
9234diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
9235index 57d6b29..f33fa96 100644
9236--- a/drivers/tty/serial/omap-serial.c
9237+++ b/drivers/tty/serial/omap-serial.c
9238@@ -1166,13 +1166,10 @@ serial_omap_console_write(struct console *co, const char *s,
9239
9240 pm_runtime_get_sync(up->dev);
9241
9242- local_irq_save(flags);
9243- if (up->port.sysrq)
9244- locked = 0;
9245- else if (oops_in_progress)
9246- locked = spin_trylock(&up->port.lock);
9247+ if (up->port.sysrq || oops_in_progress)
9248+ locked = spin_trylock_irqsave(&up->port.lock, flags);
9249 else
9250- spin_lock(&up->port.lock);
9251+ spin_lock_irqsave(&up->port.lock, flags);
9252
9253 /*
9254 * First save the IER then disable the interrupts
9255@@ -1201,8 +1198,7 @@ serial_omap_console_write(struct console *co, const char *s,
9256 pm_runtime_mark_last_busy(up->dev);
9257 pm_runtime_put_autosuspend(up->dev);
9258 if (locked)
9259- spin_unlock(&up->port.lock);
9260- local_irq_restore(flags);
9261+ spin_unlock_irqrestore(&up->port.lock, flags);
9262 }
9263
9264 static int __init
9265diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
9266index cd1f861..7c021eb 100644
9267--- a/drivers/tty/tty_buffer.c
9268+++ b/drivers/tty/tty_buffer.c
9269@@ -566,10 +566,15 @@ void tty_flip_buffer_push(struct tty_struct *tty)
9270 buf->tail->commit = buf->tail->used;
9271 spin_unlock_irqrestore(&buf->lock, flags);
9272
9273+#ifndef CONFIG_PREEMPT_RT_FULL
9274 if (tty->low_latency)
9275 flush_to_ldisc(&buf->work);
9276 else
9277 schedule_work(&buf->work);
9278+#else
9279+ flush_to_ldisc(&buf->work);
9280+#endif
9281+
9282 }
9283 EXPORT_SYMBOL(tty_flip_buffer_push);
9284
9285diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
9286index 3bc244d..a62c4a4 100644
9287--- a/drivers/usb/chipidea/debug.c
9288+++ b/drivers/usb/chipidea/debug.c
9289@@ -222,7 +222,7 @@ static struct {
9290 } dbg_data = {
9291 .idx = 0,
9292 .tty = 0,
9293- .lck = __RW_LOCK_UNLOCKED(lck)
9294+ .lck = __RW_LOCK_UNLOCKED(dbg_data.lck)
9295 };
9296
9297 /**
9298diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
9299index 8e64adf..59c4d3c 100644
9300--- a/drivers/usb/core/hcd.c
9301+++ b/drivers/usb/core/hcd.c
9302@@ -2217,7 +2217,7 @@ irqreturn_t usb_hcd_irq (int irq, void *__hcd)
9303 * when the first handler doesn't use it. So let's just
9304 * assume it's never used.
9305 */
9306- local_irq_save(flags);
9307+ local_irq_save_nort(flags);
9308
9309 if (unlikely(HCD_DEAD(hcd) || !HCD_HW_ACCESSIBLE(hcd)))
9310 rc = IRQ_NONE;
9311@@ -2226,7 +2226,7 @@ irqreturn_t usb_hcd_irq (int irq, void *__hcd)
9312 else
9313 rc = IRQ_HANDLED;
9314
9315- local_irq_restore(flags);
9316+ local_irq_restore_nort(flags);
9317 return rc;
9318 }
9319 EXPORT_SYMBOL_GPL(usb_hcd_irq);
9320diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
9321index 180a2b0..1a3e81a 100644
9322--- a/drivers/usb/host/ohci-hcd.c
9323+++ b/drivers/usb/host/ohci-hcd.c
9324@@ -857,9 +857,13 @@ static irqreturn_t ohci_irq (struct usb_hcd *hcd)
9325 }
9326
9327 if (ints & OHCI_INTR_WDH) {
9328- spin_lock (&ohci->lock);
9329- dl_done_list (ohci);
9330- spin_unlock (&ohci->lock);
9331+ if (ohci->hcca->done_head == 0) {
9332+ ints &= ~OHCI_INTR_WDH;
9333+ } else {
9334+ spin_lock (&ohci->lock);
9335+ dl_done_list (ohci);
9336+ spin_unlock (&ohci->lock);
9337+ }
9338 }
9339
9340 if (quirk_zfmicro(ohci) && (ints & OHCI_INTR_SF)) {
9341diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
9342index b785e77..b0f12d7 100644
9343--- a/fs/autofs4/autofs_i.h
9344+++ b/fs/autofs4/autofs_i.h
9345@@ -34,6 +34,7 @@
9346 #include <linux/sched.h>
9347 #include <linux/mount.h>
9348 #include <linux/namei.h>
9349+#include <linux/delay.h>
9350 #include <asm/current.h>
9351 #include <asm/uaccess.h>
9352
9353diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
9354index 01443ce..5928f92 100644
9355--- a/fs/autofs4/expire.c
9356+++ b/fs/autofs4/expire.c
9357@@ -166,7 +166,7 @@ again:
9358 parent = p->d_parent;
9359 if (!spin_trylock(&parent->d_lock)) {
9360 spin_unlock(&p->d_lock);
9361- cpu_relax();
9362+ cpu_chill();
9363 goto relock;
9364 }
9365 spin_unlock(&p->d_lock);
9366diff --git a/fs/buffer.c b/fs/buffer.c
9367index 7a75c3e..8863f45 100644
9368--- a/fs/buffer.c
9369+++ b/fs/buffer.c
9370@@ -280,8 +280,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
9371 * decide that the page is now completely done.
9372 */
9373 first = page_buffers(page);
9374- local_irq_save(flags);
9375- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
9376+ flags = bh_uptodate_lock_irqsave(first);
9377 clear_buffer_async_read(bh);
9378 unlock_buffer(bh);
9379 tmp = bh;
9380@@ -294,8 +293,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
9381 }
9382 tmp = tmp->b_this_page;
9383 } while (tmp != bh);
9384- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
9385- local_irq_restore(flags);
9386+ bh_uptodate_unlock_irqrestore(first, flags);
9387
9388 /*
9389 * If none of the buffers had errors and they are all
9390@@ -307,9 +305,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
9391 return;
9392
9393 still_busy:
9394- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
9395- local_irq_restore(flags);
9396- return;
9397+ bh_uptodate_unlock_irqrestore(first, flags);
9398 }
9399
9400 /*
9401@@ -343,8 +339,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
9402 }
9403
9404 first = page_buffers(page);
9405- local_irq_save(flags);
9406- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
9407+ flags = bh_uptodate_lock_irqsave(first);
9408
9409 clear_buffer_async_write(bh);
9410 unlock_buffer(bh);
9411@@ -356,15 +351,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
9412 }
9413 tmp = tmp->b_this_page;
9414 }
9415- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
9416- local_irq_restore(flags);
9417+ bh_uptodate_unlock_irqrestore(first, flags);
9418 end_page_writeback(page);
9419 return;
9420
9421 still_busy:
9422- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
9423- local_irq_restore(flags);
9424- return;
9425+ bh_uptodate_unlock_irqrestore(first, flags);
9426 }
9427 EXPORT_SYMBOL(end_buffer_async_write);
9428
9429@@ -3256,6 +3248,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
9430 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
9431 if (ret) {
9432 INIT_LIST_HEAD(&ret->b_assoc_buffers);
9433+ buffer_head_init_locks(ret);
9434 preempt_disable();
9435 __this_cpu_inc(bh_accounting.nr);
9436 recalc_bh_state();
9437diff --git a/fs/dcache.c b/fs/dcache.c
9438index c3bbf85..5bf12b1 100644
9439--- a/fs/dcache.c
9440+++ b/fs/dcache.c
9441@@ -37,6 +37,7 @@
9442 #include <linux/rculist_bl.h>
9443 #include <linux/prefetch.h>
9444 #include <linux/ratelimit.h>
9445+#include <linux/delay.h>
9446 #include "internal.h"
9447 #include "mount.h"
9448
9449@@ -470,7 +471,7 @@ static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
9450 if (inode && !spin_trylock(&inode->i_lock)) {
9451 relock:
9452 spin_unlock(&dentry->d_lock);
9453- cpu_relax();
9454+ cpu_chill();
9455 return dentry; /* try again with same dentry */
9456 }
9457 if (IS_ROOT(dentry))
9458@@ -852,7 +853,7 @@ relock:
9459
9460 if (!spin_trylock(&dentry->d_lock)) {
9461 spin_unlock(&dcache_lru_lock);
9462- cpu_relax();
9463+ cpu_chill();
9464 goto relock;
9465 }
9466
9467@@ -2084,7 +2085,7 @@ again:
9468 if (dentry->d_count == 1) {
9469 if (!spin_trylock(&inode->i_lock)) {
9470 spin_unlock(&dentry->d_lock);
9471- cpu_relax();
9472+ cpu_chill();
9473 goto again;
9474 }
9475 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
9476diff --git a/fs/eventpoll.c b/fs/eventpoll.c
9477index 9fec183..343e14a 100644
9478--- a/fs/eventpoll.c
9479+++ b/fs/eventpoll.c
9480@@ -497,12 +497,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
9481 */
9482 static void ep_poll_safewake(wait_queue_head_t *wq)
9483 {
9484- int this_cpu = get_cpu();
9485+ int this_cpu = get_cpu_light();
9486
9487 ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
9488 ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
9489
9490- put_cpu();
9491+ put_cpu_light();
9492 }
9493
9494 static void ep_remove_wait_queue(struct eppoll_entry *pwq)
9495diff --git a/fs/exec.c b/fs/exec.c
9496index 20df02c..db1c4ec 100644
9497--- a/fs/exec.c
9498+++ b/fs/exec.c
9499@@ -827,10 +827,12 @@ static int exec_mmap(struct mm_struct *mm)
9500 }
9501 }
9502 task_lock(tsk);
9503+ preempt_disable_rt();
9504 active_mm = tsk->active_mm;
9505 tsk->mm = mm;
9506 tsk->active_mm = mm;
9507 activate_mm(active_mm, mm);
9508+ preempt_enable_rt();
9509 task_unlock(tsk);
9510 arch_pick_mmap_layout(mm);
9511 if (old_mm) {
9512diff --git a/fs/file.c b/fs/file.c
9513index 2b3570b..328087b 100644
9514--- a/fs/file.c
9515+++ b/fs/file.c
9516@@ -98,14 +98,14 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
9517 kfree(fdt->open_fds);
9518 kfree(fdt);
9519 } else {
9520- fddef = &get_cpu_var(fdtable_defer_list);
9521+ fddef = &per_cpu(fdtable_defer_list, get_cpu_light());
9522 spin_lock(&fddef->lock);
9523 fdt->next = fddef->next;
9524 fddef->next = fdt;
9525 /* vmallocs are handled from the workqueue context */
9526 schedule_work(&fddef->wq);
9527 spin_unlock(&fddef->lock);
9528- put_cpu_var(fdtable_defer_list);
9529+ put_cpu_light();
9530 }
9531 }
9532
9533@@ -516,7 +516,7 @@ struct files_struct init_files = {
9534 .close_on_exec = init_files.close_on_exec_init,
9535 .open_fds = init_files.open_fds_init,
9536 },
9537- .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
9538+ .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
9539 };
9540
9541 /*
9542diff --git a/fs/fscache/page.c b/fs/fscache/page.c
9543index ff000e5..c84696c 100644
9544--- a/fs/fscache/page.c
9545+++ b/fs/fscache/page.c
9546@@ -796,11 +796,13 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
9547
9548 _enter("");
9549
9550- while (spin_lock(&cookie->stores_lock),
9551- n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0,
9552- ARRAY_SIZE(results),
9553- FSCACHE_COOKIE_PENDING_TAG),
9554- n > 0) {
9555+ spin_lock(&cookie->stores_lock);
9556+ while (1) {
9557+ n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0,
9558+ ARRAY_SIZE(results),
9559+ FSCACHE_COOKIE_PENDING_TAG);
9560+ if (n == 0)
9561+ break;
9562 for (i = n - 1; i >= 0; i--) {
9563 page = results[i];
9564 radix_tree_delete(&cookie->stores, page->index);
9565@@ -810,6 +812,7 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
9566
9567 for (i = n - 1; i >= 0; i--)
9568 page_cache_release(results[i]);
9569+ spin_lock(&cookie->stores_lock);
9570 }
9571
9572 spin_unlock(&cookie->stores_lock);
9573diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
9574index 08c0304..95debd7 100644
9575--- a/fs/jbd/checkpoint.c
9576+++ b/fs/jbd/checkpoint.c
9577@@ -129,6 +129,8 @@ void __log_wait_for_space(journal_t *journal)
9578 if (journal->j_flags & JFS_ABORT)
9579 return;
9580 spin_unlock(&journal->j_state_lock);
9581+ if (current->plug)
9582+ io_schedule();
9583 mutex_lock(&journal->j_checkpoint_mutex);
9584
9585 /*
9586diff --git a/fs/namespace.c b/fs/namespace.c
9587index 5dd7709..859a026 100644
9588--- a/fs/namespace.c
9589+++ b/fs/namespace.c
9590@@ -22,6 +22,7 @@
9591 #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
9592 #include <linux/uaccess.h>
9593 #include <linux/proc_fs.h>
9594+#include <linux/delay.h>
9595 #include "pnode.h"
9596 #include "internal.h"
9597
9598@@ -313,8 +314,11 @@ int __mnt_want_write(struct vfsmount *m)
9599 * incremented count after it has set MNT_WRITE_HOLD.
9600 */
9601 smp_mb();
9602- while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
9603- cpu_relax();
9604+ while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
9605+ preempt_enable();
9606+ cpu_chill();
9607+ preempt_disable();
9608+ }
9609 /*
9610 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
9611 * be set to match its requirements. So we must not load that until
9612diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
9613index fa9c05f..f5d4565 100644
9614--- a/fs/ntfs/aops.c
9615+++ b/fs/ntfs/aops.c
9616@@ -108,8 +108,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
9617 "0x%llx.", (unsigned long long)bh->b_blocknr);
9618 }
9619 first = page_buffers(page);
9620- local_irq_save(flags);
9621- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
9622+ flags = bh_uptodate_lock_irqsave(first);
9623 clear_buffer_async_read(bh);
9624 unlock_buffer(bh);
9625 tmp = bh;
9626@@ -124,8 +123,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
9627 }
9628 tmp = tmp->b_this_page;
9629 } while (tmp != bh);
9630- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
9631- local_irq_restore(flags);
9632+ bh_uptodate_unlock_irqrestore(first, flags);
9633 /*
9634 * If none of the buffers had errors then we can set the page uptodate,
9635 * but we first have to perform the post read mst fixups, if the
9636@@ -146,13 +144,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
9637 recs = PAGE_CACHE_SIZE / rec_size;
9638 /* Should have been verified before we got here... */
9639 BUG_ON(!recs);
9640- local_irq_save(flags);
9641+ local_irq_save_nort(flags);
9642 kaddr = kmap_atomic(page);
9643 for (i = 0; i < recs; i++)
9644 post_read_mst_fixup((NTFS_RECORD*)(kaddr +
9645 i * rec_size), rec_size);
9646 kunmap_atomic(kaddr);
9647- local_irq_restore(flags);
9648+ local_irq_restore_nort(flags);
9649 flush_dcache_page(page);
9650 if (likely(page_uptodate && !PageError(page)))
9651 SetPageUptodate(page);
9652@@ -160,9 +158,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
9653 unlock_page(page);
9654 return;
9655 still_busy:
9656- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
9657- local_irq_restore(flags);
9658- return;
9659+ bh_uptodate_unlock_irqrestore(first, flags);
9660 }
9661
9662 /**
9663diff --git a/fs/timerfd.c b/fs/timerfd.c
9664index d03822b..522aeb8 100644
9665--- a/fs/timerfd.c
9666+++ b/fs/timerfd.c
9667@@ -311,7 +311,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
9668 if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
9669 break;
9670 spin_unlock_irq(&ctx->wqh.lock);
9671- cpu_relax();
9672+ hrtimer_wait_for_timer(&ctx->tmr);
9673 }
9674
9675 /*
9676diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
9677index 7d10f96..aee7fd2 100644
9678--- a/include/asm-generic/bug.h
9679+++ b/include/asm-generic/bug.h
9680@@ -202,6 +202,20 @@ extern void warn_slowpath_null(const char *file, const int line);
9681 # define WARN_ON_SMP(x) ({0;})
9682 #endif
9683
9684+#ifdef CONFIG_PREEMPT_RT_BASE
9685+# define BUG_ON_RT(c) BUG_ON(c)
9686+# define BUG_ON_NONRT(c) do { } while (0)
9687+# define WARN_ON_RT(condition) WARN_ON(condition)
9688+# define WARN_ON_NONRT(condition) do { } while (0)
9689+# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
9690+#else
9691+# define BUG_ON_RT(c) do { } while (0)
9692+# define BUG_ON_NONRT(c) BUG_ON(c)
9693+# define WARN_ON_RT(condition) do { } while (0)
9694+# define WARN_ON_NONRT(condition) WARN_ON(condition)
9695+# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
9696+#endif
9697+
9698 #endif /* __ASSEMBLY__ */
9699
9700 #endif
9701diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
9702index 2533fdd..d8d4c89 100644
9703--- a/include/asm-generic/cmpxchg-local.h
9704+++ b/include/asm-generic/cmpxchg-local.h
9705@@ -21,7 +21,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
9706 if (size == 8 && sizeof(unsigned long) != 8)
9707 wrong_size_cmpxchg(ptr);
9708
9709- local_irq_save(flags);
9710+ raw_local_irq_save(flags);
9711 switch (size) {
9712 case 1: prev = *(u8 *)ptr;
9713 if (prev == old)
9714@@ -42,7 +42,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
9715 default:
9716 wrong_size_cmpxchg(ptr);
9717 }
9718- local_irq_restore(flags);
9719+ raw_local_irq_restore(flags);
9720 return prev;
9721 }
9722
9723@@ -55,11 +55,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr,
9724 u64 prev;
9725 unsigned long flags;
9726
9727- local_irq_save(flags);
9728+ raw_local_irq_save(flags);
9729 prev = *(u64 *)ptr;
9730 if (prev == old)
9731 *(u64 *)ptr = new;
9732- local_irq_restore(flags);
9733+ raw_local_irq_restore(flags);
9734 return prev;
9735 }
9736
9737diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
9738index 458f497..3f8e27b 100644
9739--- a/include/linux/buffer_head.h
9740+++ b/include/linux/buffer_head.h
9741@@ -72,8 +72,52 @@ struct buffer_head {
9742 struct address_space *b_assoc_map; /* mapping this buffer is
9743 associated with */
9744 atomic_t b_count; /* users using this buffer_head */
9745+#ifdef CONFIG_PREEMPT_RT_BASE
9746+ spinlock_t b_uptodate_lock;
9747+#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
9748+ defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
9749+ spinlock_t b_state_lock;
9750+ spinlock_t b_journal_head_lock;
9751+#endif
9752+#endif
9753 };
9754
9755+static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
9756+{
9757+ unsigned long flags;
9758+
9759+#ifndef CONFIG_PREEMPT_RT_BASE
9760+ local_irq_save(flags);
9761+ bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
9762+#else
9763+ spin_lock_irqsave(&bh->b_uptodate_lock, flags);
9764+#endif
9765+ return flags;
9766+}
9767+
9768+static inline void
9769+bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
9770+{
9771+#ifndef CONFIG_PREEMPT_RT_BASE
9772+ bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
9773+ local_irq_restore(flags);
9774+#else
9775+ spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
9776+#endif
9777+}
9778+
9779+static inline void buffer_head_init_locks(struct buffer_head *bh)
9780+{
9781+#ifdef CONFIG_PREEMPT_RT_BASE
9782+ spin_lock_init(&bh->b_uptodate_lock);
9783+#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
9784+ defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
9785+ spin_lock_init(&bh->b_state_lock);
9786+ spin_lock_init(&bh->b_journal_head_lock);
9787+#endif
9788+#endif
9789+}
9790+
9791 /*
9792 * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
9793 * and buffer_foo() functions.
9794diff --git a/include/linux/completion.h b/include/linux/completion.h
9795index 51494e6..ebb6565 100644
9796--- a/include/linux/completion.h
9797+++ b/include/linux/completion.h
9798@@ -8,7 +8,7 @@
9799 * See kernel/sched.c for details.
9800 */
9801
9802-#include <linux/wait.h>
9803+#include <linux/wait-simple.h>
9804
9805 /*
9806 * struct completion - structure used to maintain state for a "completion"
9807@@ -24,11 +24,11 @@
9808 */
9809 struct completion {
9810 unsigned int done;
9811- wait_queue_head_t wait;
9812+ struct swait_head wait;
9813 };
9814
9815 #define COMPLETION_INITIALIZER(work) \
9816- { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
9817+ { 0, SWAIT_HEAD_INITIALIZER((work).wait) }
9818
9819 #define COMPLETION_INITIALIZER_ONSTACK(work) \
9820 ({ init_completion(&work); work; })
9821@@ -73,7 +73,7 @@ struct completion {
9822 static inline void init_completion(struct completion *x)
9823 {
9824 x->done = 0;
9825- init_waitqueue_head(&x->wait);
9826+ init_swait_head(&x->wait);
9827 }
9828
9829 extern void wait_for_completion(struct completion *);
9830diff --git a/include/linux/console.h b/include/linux/console.h
9831index 47b858c..4a6948a 100644
9832--- a/include/linux/console.h
9833+++ b/include/linux/console.h
9834@@ -141,6 +141,7 @@ struct console {
9835 for (con = console_drivers; con != NULL; con = con->next)
9836
9837 extern int console_set_on_cmdline;
9838+extern struct console *early_console;
9839
9840 extern int add_preferred_console(char *name, int idx, char *options);
9841 extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options);
9842diff --git a/include/linux/cpu.h b/include/linux/cpu.h
9843index ce7a074..7781c9e 100644
9844--- a/include/linux/cpu.h
9845+++ b/include/linux/cpu.h
9846@@ -175,6 +175,8 @@ extern struct bus_type cpu_subsys;
9847
9848 extern void get_online_cpus(void);
9849 extern void put_online_cpus(void);
9850+extern void pin_current_cpu(void);
9851+extern void unpin_current_cpu(void);
9852 #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
9853 #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
9854 #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
9855@@ -198,6 +200,8 @@ static inline void cpu_hotplug_driver_unlock(void)
9856
9857 #define get_online_cpus() do { } while (0)
9858 #define put_online_cpus() do { } while (0)
9859+static inline void pin_current_cpu(void) { }
9860+static inline void unpin_current_cpu(void) { }
9861 #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
9862 /* These aren't inline functions due to a GCC bug. */
9863 #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; })
9864diff --git a/include/linux/delay.h b/include/linux/delay.h
9865index a6ecb34..e23a7c0 100644
9866--- a/include/linux/delay.h
9867+++ b/include/linux/delay.h
9868@@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds)
9869 msleep(seconds * 1000);
9870 }
9871
9872+#ifdef CONFIG_PREEMPT_RT_FULL
9873+# define cpu_chill() msleep(1)
9874+#else
9875+# define cpu_chill() cpu_relax()
9876+#endif
9877+
9878 #endif /* defined(_LINUX_DELAY_H) */
9879diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
9880index d3201e4..96d3e4a 100644
9881--- a/include/linux/dmaengine.h
9882+++ b/include/linux/dmaengine.h
9883@@ -608,7 +608,10 @@ static inline int dmaengine_device_control(struct dma_chan *chan,
9884 enum dma_ctrl_cmd cmd,
9885 unsigned long arg)
9886 {
9887- return chan->device->device_control(chan, cmd, arg);
9888+ if (chan->device->device_control)
9889+ return chan->device->device_control(chan, cmd, arg);
9890+
9891+ return -ENOSYS;
9892 }
9893
9894 static inline int dmaengine_slave_config(struct dma_chan *chan,
9895@@ -618,6 +621,11 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
9896 (unsigned long)config);
9897 }
9898
9899+static inline bool is_slave_direction(enum dma_transfer_direction direction)
9900+{
9901+ return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM);
9902+}
9903+
9904 static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
9905 struct dma_chan *chan, dma_addr_t buf, size_t len,
9906 enum dma_transfer_direction dir, unsigned long flags)
9907@@ -660,6 +668,13 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic(
9908 period_len, dir, flags, NULL);
9909 }
9910
9911+static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
9912+ struct dma_chan *chan, struct dma_interleaved_template *xt,
9913+ unsigned long flags)
9914+{
9915+ return chan->device->device_prep_interleaved_dma(chan, xt, flags);
9916+}
9917+
9918 static inline int dmaengine_terminate_all(struct dma_chan *chan)
9919 {
9920 return dmaengine_device_control(chan, DMA_TERMINATE_ALL, 0);
9921@@ -849,20 +864,6 @@ static inline bool async_tx_test_ack(struct dma_async_tx_descriptor *tx)
9922 return (tx->flags & DMA_CTRL_ACK) == DMA_CTRL_ACK;
9923 }
9924
9925-#define first_dma_cap(mask) __first_dma_cap(&(mask))
9926-static inline int __first_dma_cap(const dma_cap_mask_t *srcp)
9927-{
9928- return min_t(int, DMA_TX_TYPE_END,
9929- find_first_bit(srcp->bits, DMA_TX_TYPE_END));
9930-}
9931-
9932-#define next_dma_cap(n, mask) __next_dma_cap((n), &(mask))
9933-static inline int __next_dma_cap(int n, const dma_cap_mask_t *srcp)
9934-{
9935- return min_t(int, DMA_TX_TYPE_END,
9936- find_next_bit(srcp->bits, DMA_TX_TYPE_END, n+1));
9937-}
9938-
9939 #define dma_cap_set(tx, mask) __dma_cap_set((tx), &(mask))
9940 static inline void
9941 __dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp)
9942@@ -891,9 +892,7 @@ __dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp)
9943 }
9944
9945 #define for_each_dma_cap_mask(cap, mask) \
9946- for ((cap) = first_dma_cap(mask); \
9947- (cap) < DMA_TX_TYPE_END; \
9948- (cap) = next_dma_cap((cap), (mask)))
9949+ for_each_set_bit(cap, mask.bits, DMA_TX_TYPE_END)
9950
9951 /**
9952 * dma_async_issue_pending - flush pending transactions to HW
9953@@ -907,8 +906,6 @@ static inline void dma_async_issue_pending(struct dma_chan *chan)
9954 chan->device->device_issue_pending(chan);
9955 }
9956
9957-#define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan)
9958-
9959 /**
9960 * dma_async_is_tx_complete - poll for transaction completion
9961 * @chan: DMA channel
9962@@ -934,16 +931,13 @@ static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
9963 return status;
9964 }
9965
9966-#define dma_async_memcpy_complete(chan, cookie, last, used)\
9967- dma_async_is_tx_complete(chan, cookie, last, used)
9968-
9969 /**
9970 * dma_async_is_complete - test a cookie against chan state
9971 * @cookie: transaction identifier to test status of
9972 * @last_complete: last know completed transaction
9973 * @last_used: last cookie value handed out
9974 *
9975- * dma_async_is_complete() is used in dma_async_memcpy_complete()
9976+ * dma_async_is_complete() is used in dma_async_is_tx_complete()
9977 * the test logic is separated for lightweight testing of multiple cookies
9978 */
9979 static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
9980@@ -973,7 +967,9 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
9981 #ifdef CONFIG_DMA_ENGINE
9982 enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
9983 void dma_issue_pending_all(void);
9984-struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
9985+struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
9986+ dma_filter_fn fn, void *fn_param);
9987+struct dma_chan *dma_request_slave_channel(struct device *dev, const char *name);
9988 void dma_release_channel(struct dma_chan *chan);
9989 #else
9990 static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
9991@@ -983,11 +979,16 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript
9992 static inline void dma_issue_pending_all(void)
9993 {
9994 }
9995-static inline struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask,
9996+static inline struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
9997 dma_filter_fn fn, void *fn_param)
9998 {
9999 return NULL;
10000 }
10001+static inline struct dma_chan *dma_request_slave_channel(struct device *dev,
10002+ const char *name)
10003+{
10004+ return NULL;
10005+}
10006 static inline void dma_release_channel(struct dma_chan *chan)
10007 {
10008 }
10009@@ -1001,6 +1002,22 @@ void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
10010 struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
10011 struct dma_chan *net_dma_find_channel(void);
10012 #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
10013+#define dma_request_slave_channel_compat(mask, x, y, dev, name) \
10014+ __dma_request_slave_channel_compat(&(mask), x, y, dev, name)
10015+
10016+static inline struct dma_chan
10017+*__dma_request_slave_channel_compat(const dma_cap_mask_t *mask,
10018+ dma_filter_fn fn, void *fn_param,
10019+ struct device *dev, char *name)
10020+{
10021+ struct dma_chan *chan;
10022+
10023+ chan = dma_request_slave_channel(dev, name);
10024+ if (chan)
10025+ return chan;
10026+
10027+ return __dma_request_channel(mask, fn, fn_param);
10028+}
10029
10030 /* --- Helper iov-locking functions --- */
10031
10032diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
10033index a3d4895..16ad63d 100644
10034--- a/include/linux/ftrace_event.h
10035+++ b/include/linux/ftrace_event.h
10036@@ -49,7 +49,9 @@ struct trace_entry {
10037 unsigned char flags;
10038 unsigned char preempt_count;
10039 int pid;
10040- int padding;
10041+ unsigned short migrate_disable;
10042+ unsigned short padding;
10043+ unsigned char preempt_lazy_count;
10044 };
10045
10046 #define FTRACE_MAX_EVENT \
10047diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
10048index 624ef3f..dfa97de 100644
10049--- a/include/linux/hardirq.h
10050+++ b/include/linux/hardirq.h
10051@@ -61,7 +61,11 @@
10052 #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
10053 #define NMI_OFFSET (1UL << NMI_SHIFT)
10054
10055-#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
10056+#ifndef CONFIG_PREEMPT_RT_FULL
10057+# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
10058+#else
10059+# define SOFTIRQ_DISABLE_OFFSET (0)
10060+#endif
10061
10062 #ifndef PREEMPT_ACTIVE
10063 #define PREEMPT_ACTIVE_BITS 1
10064@@ -74,10 +78,17 @@
10065 #endif
10066
10067 #define hardirq_count() (preempt_count() & HARDIRQ_MASK)
10068-#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
10069 #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
10070 | NMI_MASK))
10071
10072+#ifndef CONFIG_PREEMPT_RT_FULL
10073+# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
10074+# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
10075+#else
10076+# define softirq_count() (0UL)
10077+extern int in_serving_softirq(void);
10078+#endif
10079+
10080 /*
10081 * Are we doing bottom half or hardware interrupt processing?
10082 * Are we in a softirq context? Interrupt context?
10083@@ -87,7 +98,6 @@
10084 #define in_irq() (hardirq_count())
10085 #define in_softirq() (softirq_count())
10086 #define in_interrupt() (irq_count())
10087-#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
10088
10089 /*
10090 * Are we in NMI context?
10091diff --git a/include/linux/highmem.h b/include/linux/highmem.h
10092index ef788b5..84223de 100644
10093--- a/include/linux/highmem.h
10094+++ b/include/linux/highmem.h
10095@@ -7,6 +7,7 @@
10096 #include <linux/mm.h>
10097 #include <linux/uaccess.h>
10098 #include <linux/hardirq.h>
10099+#include <linux/sched.h>
10100
10101 #include <asm/cacheflush.h>
10102
10103@@ -85,32 +86,51 @@ static inline void __kunmap_atomic(void *addr)
10104
10105 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
10106
10107+#ifndef CONFIG_PREEMPT_RT_FULL
10108 DECLARE_PER_CPU(int, __kmap_atomic_idx);
10109+#endif
10110
10111 static inline int kmap_atomic_idx_push(void)
10112 {
10113+#ifndef CONFIG_PREEMPT_RT_FULL
10114 int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
10115
10116-#ifdef CONFIG_DEBUG_HIGHMEM
10117+# ifdef CONFIG_DEBUG_HIGHMEM
10118 WARN_ON_ONCE(in_irq() && !irqs_disabled());
10119 BUG_ON(idx > KM_TYPE_NR);
10120-#endif
10121+# endif
10122 return idx;
10123+#else
10124+ current->kmap_idx++;
10125+ BUG_ON(current->kmap_idx > KM_TYPE_NR);
10126+ return current->kmap_idx - 1;
10127+#endif
10128 }
10129
10130 static inline int kmap_atomic_idx(void)
10131 {
10132+#ifndef CONFIG_PREEMPT_RT_FULL
10133 return __this_cpu_read(__kmap_atomic_idx) - 1;
10134+#else
10135+ return current->kmap_idx - 1;
10136+#endif
10137 }
10138
10139 static inline void kmap_atomic_idx_pop(void)
10140 {
10141-#ifdef CONFIG_DEBUG_HIGHMEM
10142+#ifndef CONFIG_PREEMPT_RT_FULL
10143+# ifdef CONFIG_DEBUG_HIGHMEM
10144 int idx = __this_cpu_dec_return(__kmap_atomic_idx);
10145
10146 BUG_ON(idx < 0);
10147-#else
10148+# else
10149 __this_cpu_dec(__kmap_atomic_idx);
10150+# endif
10151+#else
10152+ current->kmap_idx--;
10153+# ifdef CONFIG_DEBUG_HIGHMEM
10154+ BUG_ON(current->kmap_idx < 0);
10155+# endif
10156 #endif
10157 }
10158
10159diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
10160index cc07d27..113bcf1 100644
10161--- a/include/linux/hrtimer.h
10162+++ b/include/linux/hrtimer.h
10163@@ -111,6 +111,11 @@ struct hrtimer {
10164 enum hrtimer_restart (*function)(struct hrtimer *);
10165 struct hrtimer_clock_base *base;
10166 unsigned long state;
10167+ struct list_head cb_entry;
10168+ int irqsafe;
10169+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
10170+ ktime_t praecox;
10171+#endif
10172 #ifdef CONFIG_TIMER_STATS
10173 int start_pid;
10174 void *start_site;
10175@@ -147,6 +152,7 @@ struct hrtimer_clock_base {
10176 int index;
10177 clockid_t clockid;
10178 struct timerqueue_head active;
10179+ struct list_head expired;
10180 ktime_t resolution;
10181 ktime_t (*get_time)(void);
10182 ktime_t softirq_time;
10183@@ -189,6 +195,9 @@ struct hrtimer_cpu_base {
10184 unsigned long nr_hangs;
10185 ktime_t max_hang_time;
10186 #endif
10187+#ifdef CONFIG_PREEMPT_RT_BASE
10188+ wait_queue_head_t wait;
10189+#endif
10190 struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
10191 };
10192
10193@@ -382,6 +391,13 @@ static inline int hrtimer_restart(struct hrtimer *timer)
10194 return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
10195 }
10196
10197+/* Softirq preemption could deadlock timer removal */
10198+#ifdef CONFIG_PREEMPT_RT_BASE
10199+ extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
10200+#else
10201+# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
10202+#endif
10203+
10204 /* Query timers: */
10205 extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
10206 extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
10207diff --git a/include/linux/idr.h b/include/linux/idr.h
10208index de7e190..e5eb125 100644
10209--- a/include/linux/idr.h
10210+++ b/include/linux/idr.h
10211@@ -136,7 +136,7 @@ struct ida {
10212 struct ida_bitmap *free_bitmap;
10213 };
10214
10215-#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, }
10216+#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
10217 #define DEFINE_IDA(name) struct ida name = IDA_INIT(name)
10218
10219 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
10220diff --git a/include/linux/init_task.h b/include/linux/init_task.h
10221index 6d087c5..10f32ab 100644
10222--- a/include/linux/init_task.h
10223+++ b/include/linux/init_task.h
10224@@ -141,6 +141,12 @@ extern struct task_group root_task_group;
10225 # define INIT_PERF_EVENTS(tsk)
10226 #endif
10227
10228+#ifdef CONFIG_PREEMPT_RT_BASE
10229+# define INIT_TIMER_LIST .posix_timer_list = NULL,
10230+#else
10231+# define INIT_TIMER_LIST
10232+#endif
10233+
10234 #define INIT_TASK_COMM "swapper"
10235
10236 /*
10237@@ -196,6 +202,7 @@ extern struct task_group root_task_group;
10238 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
10239 .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
10240 .timer_slack_ns = 50000, /* 50 usec default slack */ \
10241+ INIT_TIMER_LIST \
10242 .pids = { \
10243 [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
10244 [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
10245diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
10246index 5fa5afe..11bdb1e 100644
10247--- a/include/linux/interrupt.h
10248+++ b/include/linux/interrupt.h
10249@@ -58,6 +58,7 @@
10250 * IRQF_NO_THREAD - Interrupt cannot be threaded
10251 * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
10252 * resume time.
10253+ * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
10254 */
10255 #define IRQF_DISABLED 0x00000020
10256 #define IRQF_SHARED 0x00000080
10257@@ -71,6 +72,7 @@
10258 #define IRQF_FORCE_RESUME 0x00008000
10259 #define IRQF_NO_THREAD 0x00010000
10260 #define IRQF_EARLY_RESUME 0x00020000
10261+#define IRQF_NO_SOFTIRQ_CALL 0x00040000
10262
10263 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
10264
10265@@ -211,7 +213,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
10266 #ifdef CONFIG_LOCKDEP
10267 # define local_irq_enable_in_hardirq() do { } while (0)
10268 #else
10269-# define local_irq_enable_in_hardirq() local_irq_enable()
10270+# define local_irq_enable_in_hardirq() local_irq_enable_nort()
10271 #endif
10272
10273 extern void disable_irq_nosync(unsigned int irq);
10274@@ -383,9 +385,13 @@ static inline int disable_irq_wake(unsigned int irq)
10275
10276
10277 #ifdef CONFIG_IRQ_FORCED_THREADING
10278-extern bool force_irqthreads;
10279+# ifndef CONFIG_PREEMPT_RT_BASE
10280+ extern bool force_irqthreads;
10281+# else
10282+# define force_irqthreads (true)
10283+# endif
10284 #else
10285-#define force_irqthreads (0)
10286+#define force_irqthreads (false)
10287 #endif
10288
10289 #ifndef __ARCH_SET_SOFTIRQ_PENDING
10290@@ -441,8 +447,14 @@ struct softirq_action
10291 void (*action)(struct softirq_action *);
10292 };
10293
10294+#ifndef CONFIG_PREEMPT_RT_FULL
10295 asmlinkage void do_softirq(void);
10296 asmlinkage void __do_softirq(void);
10297+static inline void thread_do_softirq(void) { do_softirq(); }
10298+#else
10299+extern void thread_do_softirq(void);
10300+#endif
10301+
10302 extern void open_softirq(int nr, void (*action)(struct softirq_action *));
10303 extern void softirq_init(void);
10304 extern void __raise_softirq_irqoff(unsigned int nr);
10305@@ -450,6 +462,8 @@ extern void __raise_softirq_irqoff(unsigned int nr);
10306 extern void raise_softirq_irqoff(unsigned int nr);
10307 extern void raise_softirq(unsigned int nr);
10308
10309+extern void softirq_check_pending_idle(void);
10310+
10311 /* This is the worklist that queues up per-cpu softirq work.
10312 *
10313 * send_remote_sendirq() adds work to these lists, and
10314@@ -490,8 +504,9 @@ extern void __send_remote_softirq(struct call_single_data *cp, int cpu,
10315 to be executed on some cpu at least once after this.
10316 * If the tasklet is already scheduled, but its execution is still not
10317 started, it will be executed only once.
10318- * If this tasklet is already running on another CPU (or schedule is called
10319- from tasklet itself), it is rescheduled for later.
10320+ * If this tasklet is already running on another CPU, it is rescheduled
10321+ for later.
10322+ * Schedule must not be called from the tasklet itself (a lockup occurs)
10323 * Tasklet is strictly serialized wrt itself, but not
10324 wrt another tasklets. If client needs some intertask synchronization,
10325 he makes it with spinlocks.
10326@@ -516,27 +531,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data }
10327 enum
10328 {
10329 TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
10330- TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
10331+ TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */
10332+ TASKLET_STATE_PENDING /* Tasklet is pending */
10333 };
10334
10335-#ifdef CONFIG_SMP
10336+#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED)
10337+#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN)
10338+#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
10339+
10340+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
10341 static inline int tasklet_trylock(struct tasklet_struct *t)
10342 {
10343 return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
10344 }
10345
10346+static inline int tasklet_tryunlock(struct tasklet_struct *t)
10347+{
10348+ return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
10349+}
10350+
10351 static inline void tasklet_unlock(struct tasklet_struct *t)
10352 {
10353 smp_mb__before_clear_bit();
10354 clear_bit(TASKLET_STATE_RUN, &(t)->state);
10355 }
10356
10357-static inline void tasklet_unlock_wait(struct tasklet_struct *t)
10358-{
10359- while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
10360-}
10361+extern void tasklet_unlock_wait(struct tasklet_struct *t);
10362+
10363 #else
10364 #define tasklet_trylock(t) 1
10365+#define tasklet_tryunlock(t) 1
10366 #define tasklet_unlock_wait(t) do { } while (0)
10367 #define tasklet_unlock(t) do { } while (0)
10368 #endif
10369@@ -585,17 +609,8 @@ static inline void tasklet_disable(struct tasklet_struct *t)
10370 smp_mb();
10371 }
10372
10373-static inline void tasklet_enable(struct tasklet_struct *t)
10374-{
10375- smp_mb__before_atomic_dec();
10376- atomic_dec(&t->count);
10377-}
10378-
10379-static inline void tasklet_hi_enable(struct tasklet_struct *t)
10380-{
10381- smp_mb__before_atomic_dec();
10382- atomic_dec(&t->count);
10383-}
10384+extern void tasklet_enable(struct tasklet_struct *t);
10385+extern void tasklet_hi_enable(struct tasklet_struct *t);
10386
10387 extern void tasklet_kill(struct tasklet_struct *t);
10388 extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
10389@@ -627,6 +642,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer)
10390 tasklet_kill(&ttimer->tasklet);
10391 }
10392
10393+#ifdef CONFIG_PREEMPT_RT_FULL
10394+extern void softirq_early_init(void);
10395+#else
10396+static inline void softirq_early_init(void) { }
10397+#endif
10398+
10399 /*
10400 * Autoprobing for irqs:
10401 *
10402diff --git a/include/linux/irq.h b/include/linux/irq.h
10403index fdf2c4a..3929bbe 100644
10404--- a/include/linux/irq.h
10405+++ b/include/linux/irq.h
10406@@ -70,6 +70,7 @@ typedef void (*irq_preflow_handler_t)(struct irq_data *data);
10407 * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context
10408 * IRQ_NESTED_TRHEAD - Interrupt nests into another thread
10409 * IRQ_PER_CPU_DEVID - Dev_id is a per-cpu variable
10410+ * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT)
10411 */
10412 enum {
10413 IRQ_TYPE_NONE = 0x00000000,
10414@@ -94,12 +95,14 @@ enum {
10415 IRQ_NESTED_THREAD = (1 << 15),
10416 IRQ_NOTHREAD = (1 << 16),
10417 IRQ_PER_CPU_DEVID = (1 << 17),
10418+ IRQ_NO_SOFTIRQ_CALL = (1 << 18),
10419 };
10420
10421 #define IRQF_MODIFY_MASK \
10422 (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
10423 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
10424- IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID)
10425+ IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
10426+ IRQ_NO_SOFTIRQ_CALL)
10427
10428 #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
10429
10430diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
10431index 623325e..a7edc47 100644
10432--- a/include/linux/irqdesc.h
10433+++ b/include/linux/irqdesc.h
10434@@ -52,6 +52,7 @@ struct irq_desc {
10435 unsigned int irq_count; /* For detecting broken IRQs */
10436 unsigned long last_unhandled; /* Aging timer for unhandled count */
10437 unsigned int irqs_unhandled;
10438+ u64 random_ip;
10439 raw_spinlock_t lock;
10440 struct cpumask *percpu_enabled;
10441 #ifdef CONFIG_SMP
10442diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
10443index d176d65..a52b35d 100644
10444--- a/include/linux/irqflags.h
10445+++ b/include/linux/irqflags.h
10446@@ -25,8 +25,6 @@
10447 # define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
10448 # define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
10449 # define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
10450-# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
10451-# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
10452 # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
10453 #else
10454 # define trace_hardirqs_on() do { } while (0)
10455@@ -39,9 +37,15 @@
10456 # define trace_softirqs_enabled(p) 0
10457 # define trace_hardirq_enter() do { } while (0)
10458 # define trace_hardirq_exit() do { } while (0)
10459+# define INIT_TRACE_IRQFLAGS
10460+#endif
10461+
10462+#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
10463+# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
10464+# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
10465+#else
10466 # define lockdep_softirq_enter() do { } while (0)
10467 # define lockdep_softirq_exit() do { } while (0)
10468-# define INIT_TRACE_IRQFLAGS
10469 #endif
10470
10471 #if defined(CONFIG_IRQSOFF_TRACER) || \
10472@@ -147,4 +151,23 @@
10473
10474 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
10475
10476+/*
10477+ * local_irq* variants depending on RT/!RT
10478+ */
10479+#ifdef CONFIG_PREEMPT_RT_FULL
10480+# define local_irq_disable_nort() do { } while (0)
10481+# define local_irq_enable_nort() do { } while (0)
10482+# define local_irq_save_nort(flags) do { local_save_flags(flags); } while (0)
10483+# define local_irq_restore_nort(flags) do { (void)(flags); } while (0)
10484+# define local_irq_disable_rt() local_irq_disable()
10485+# define local_irq_enable_rt() local_irq_enable()
10486+#else
10487+# define local_irq_disable_nort() local_irq_disable()
10488+# define local_irq_enable_nort() local_irq_enable()
10489+# define local_irq_save_nort(flags) local_irq_save(flags)
10490+# define local_irq_restore_nort(flags) local_irq_restore(flags)
10491+# define local_irq_disable_rt() do { } while (0)
10492+# define local_irq_enable_rt() do { } while (0)
10493+#endif
10494+
10495 #endif
10496diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h
10497index 6133679..0dbc151 100644
10498--- a/include/linux/jbd_common.h
10499+++ b/include/linux/jbd_common.h
10500@@ -39,32 +39,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
10501
10502 static inline void jbd_lock_bh_state(struct buffer_head *bh)
10503 {
10504+#ifndef CONFIG_PREEMPT_RT_BASE
10505 bit_spin_lock(BH_State, &bh->b_state);
10506+#else
10507+ spin_lock(&bh->b_state_lock);
10508+#endif
10509 }
10510
10511 static inline int jbd_trylock_bh_state(struct buffer_head *bh)
10512 {
10513+#ifndef CONFIG_PREEMPT_RT_BASE
10514 return bit_spin_trylock(BH_State, &bh->b_state);
10515+#else
10516+ return spin_trylock(&bh->b_state_lock);
10517+#endif
10518 }
10519
10520 static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
10521 {
10522+#ifndef CONFIG_PREEMPT_RT_BASE
10523 return bit_spin_is_locked(BH_State, &bh->b_state);
10524+#else
10525+ return spin_is_locked(&bh->b_state_lock);
10526+#endif
10527 }
10528
10529 static inline void jbd_unlock_bh_state(struct buffer_head *bh)
10530 {
10531+#ifndef CONFIG_PREEMPT_RT_BASE
10532 bit_spin_unlock(BH_State, &bh->b_state);
10533+#else
10534+ spin_unlock(&bh->b_state_lock);
10535+#endif
10536 }
10537
10538 static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
10539 {
10540+#ifndef CONFIG_PREEMPT_RT_BASE
10541 bit_spin_lock(BH_JournalHead, &bh->b_state);
10542+#else
10543+ spin_lock(&bh->b_journal_head_lock);
10544+#endif
10545 }
10546
10547 static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
10548 {
10549+#ifndef CONFIG_PREEMPT_RT_BASE
10550 bit_spin_unlock(BH_JournalHead, &bh->b_state);
10551+#else
10552+ spin_unlock(&bh->b_journal_head_lock);
10553+#endif
10554 }
10555
10556 #endif
10557diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
10558index 82ed068..8fb8edf 100644
10559--- a/include/linux/jiffies.h
10560+++ b/include/linux/jiffies.h
10561@@ -75,7 +75,6 @@ extern int register_refined_jiffies(long clock_tick_rate);
10562 */
10563 extern u64 __jiffy_data jiffies_64;
10564 extern unsigned long volatile __jiffy_data jiffies;
10565-extern seqlock_t jiffies_lock;
10566
10567 #if (BITS_PER_LONG < 64)
10568 u64 get_jiffies_64(void);
10569diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
10570index 0976fc4..40c876b 100644
10571--- a/include/linux/jump_label.h
10572+++ b/include/linux/jump_label.h
10573@@ -50,7 +50,8 @@
10574 #include <linux/compiler.h>
10575 #include <linux/workqueue.h>
10576
10577-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
10578+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) && \
10579+ !defined(CONFIG_PREEMPT_BASE)
10580
10581 struct static_key {
10582 atomic_t enabled;
10583diff --git a/include/linux/kdb.h b/include/linux/kdb.h
10584index 7f6fe6e..680ad23 100644
10585--- a/include/linux/kdb.h
10586+++ b/include/linux/kdb.h
10587@@ -115,7 +115,7 @@ extern int kdb_trap_printk;
10588 extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args);
10589 extern __printf(1, 2) int kdb_printf(const char *, ...);
10590 typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
10591-
10592+#define in_kdb_printk() (kdb_trap_printk)
10593 extern void kdb_init(int level);
10594
10595 /* Access to kdb specific polling devices */
10596@@ -150,6 +150,7 @@ extern int kdb_register_repeat(char *, kdb_func_t, char *, char *,
10597 extern int kdb_unregister(char *);
10598 #else /* ! CONFIG_KGDB_KDB */
10599 static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
10600+#define in_kdb_printk() (0)
10601 static inline void kdb_init(int level) {}
10602 static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
10603 char *help, short minlen) { return 0; }
10604diff --git a/include/linux/kernel.h b/include/linux/kernel.h
10605index c566927..8b3086d 100644
10606--- a/include/linux/kernel.h
10607+++ b/include/linux/kernel.h
10608@@ -412,6 +412,7 @@ extern enum system_states {
10609 SYSTEM_HALT,
10610 SYSTEM_POWER_OFF,
10611 SYSTEM_RESTART,
10612+ SYSTEM_SUSPEND,
10613 } system_state;
10614
10615 #define TAINT_PROPRIETARY_MODULE 0
10616diff --git a/include/linux/lglock.h b/include/linux/lglock.h
10617index 0d24e93..d2c0d6d 100644
10618--- a/include/linux/lglock.h
10619+++ b/include/linux/lglock.h
10620@@ -42,22 +42,37 @@
10621 #endif
10622
10623 struct lglock {
10624+#ifndef CONFIG_PREEMPT_RT_FULL
10625 arch_spinlock_t __percpu *lock;
10626+#else
10627+ struct rt_mutex __percpu *lock;
10628+#endif
10629 #ifdef CONFIG_DEBUG_LOCK_ALLOC
10630 struct lock_class_key lock_key;
10631 struct lockdep_map lock_dep_map;
10632 #endif
10633 };
10634
10635-#define DEFINE_LGLOCK(name) \
10636+#ifndef CONFIG_PREEMPT_RT_FULL
10637+# define DEFINE_LGLOCK(name) \
10638 static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
10639 = __ARCH_SPIN_LOCK_UNLOCKED; \
10640 struct lglock name = { .lock = &name ## _lock }
10641
10642-#define DEFINE_STATIC_LGLOCK(name) \
10643+# define DEFINE_STATIC_LGLOCK(name) \
10644 static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
10645 = __ARCH_SPIN_LOCK_UNLOCKED; \
10646 static struct lglock name = { .lock = &name ## _lock }
10647+#else
10648+
10649+# define DEFINE_LGLOCK(name) \
10650+ static DEFINE_PER_CPU(struct rt_mutex, name ## _lock); \
10651+ struct lglock name = { .lock = &name ## _lock }
10652+
10653+# define DEFINE_STATIC_LGLOCK(name) \
10654+ static DEFINE_PER_CPU(struct rt_mutex, name ## _lock); \
10655+ static struct lglock name = { .lock = &name ## _lock }
10656+#endif
10657
10658 void lg_lock_init(struct lglock *lg, char *name);
10659 void lg_local_lock(struct lglock *lg);
10660diff --git a/include/linux/list.h b/include/linux/list.h
10661index cc6d2aa..7a9851b 100644
10662--- a/include/linux/list.h
10663+++ b/include/linux/list.h
10664@@ -362,6 +362,17 @@ static inline void list_splice_tail_init(struct list_head *list,
10665 list_entry((ptr)->next, type, member)
10666
10667 /**
10668+ * list_last_entry - get the last element from a list
10669+ * @ptr: the list head to take the element from.
10670+ * @type: the type of the struct this is embedded in.
10671+ * @member: the name of the list_struct within the struct.
10672+ *
10673+ * Note, that list is expected to be not empty.
10674+ */
10675+#define list_last_entry(ptr, type, member) \
10676+ list_entry((ptr)->prev, type, member)
10677+
10678+/**
10679 * list_for_each - iterate over a list
10680 * @pos: the &struct list_head to use as a loop cursor.
10681 * @head: the head for your list.
10682diff --git a/include/linux/locallock.h b/include/linux/locallock.h
10683new file mode 100644
10684index 0000000..a5eea5d
10685--- /dev/null
10686+++ b/include/linux/locallock.h
10687@@ -0,0 +1,253 @@
10688+#ifndef _LINUX_LOCALLOCK_H
10689+#define _LINUX_LOCALLOCK_H
10690+
10691+#include <linux/spinlock.h>
10692+
10693+#ifdef CONFIG_PREEMPT_RT_BASE
10694+
10695+#ifdef CONFIG_DEBUG_SPINLOCK
10696+# define LL_WARN(cond) WARN_ON(cond)
10697+#else
10698+# define LL_WARN(cond) do { } while (0)
10699+#endif
10700+
10701+/*
10702+ * per cpu lock based substitute for local_irq_*()
10703+ */
10704+struct local_irq_lock {
10705+ spinlock_t lock;
10706+ struct task_struct *owner;
10707+ int nestcnt;
10708+ unsigned long flags;
10709+};
10710+
10711+#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
10712+ DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
10713+ .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
10714+
10715+#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
10716+ DECLARE_PER_CPU(struct local_irq_lock, lvar)
10717+
10718+#define local_irq_lock_init(lvar) \
10719+ do { \
10720+ int __cpu; \
10721+ for_each_possible_cpu(__cpu) \
10722+ spin_lock_init(&per_cpu(lvar, __cpu).lock); \
10723+ } while (0)
10724+
10725+static inline void __local_lock(struct local_irq_lock *lv)
10726+{
10727+ if (lv->owner != current) {
10728+ spin_lock(&lv->lock);
10729+ LL_WARN(lv->owner);
10730+ LL_WARN(lv->nestcnt);
10731+ lv->owner = current;
10732+ }
10733+ lv->nestcnt++;
10734+}
10735+
10736+#define local_lock(lvar) \
10737+ do { __local_lock(&get_local_var(lvar)); } while (0)
10738+
10739+static inline int __local_trylock(struct local_irq_lock *lv)
10740+{
10741+ if (lv->owner != current && spin_trylock(&lv->lock)) {
10742+ LL_WARN(lv->owner);
10743+ LL_WARN(lv->nestcnt);
10744+ lv->owner = current;
10745+ lv->nestcnt = 1;
10746+ return 1;
10747+ }
10748+ return 0;
10749+}
10750+
10751+#define local_trylock(lvar) \
10752+ ({ \
10753+ int __locked; \
10754+ __locked = __local_trylock(&get_local_var(lvar)); \
10755+ if (!__locked) \
10756+ put_local_var(lvar); \
10757+ __locked; \
10758+ })
10759+
10760+static inline void __local_unlock(struct local_irq_lock *lv)
10761+{
10762+ LL_WARN(lv->nestcnt == 0);
10763+ LL_WARN(lv->owner != current);
10764+ if (--lv->nestcnt)
10765+ return;
10766+
10767+ lv->owner = NULL;
10768+ spin_unlock(&lv->lock);
10769+}
10770+
10771+#define local_unlock(lvar) \
10772+ do { \
10773+ __local_unlock(&__get_cpu_var(lvar)); \
10774+ put_local_var(lvar); \
10775+ } while (0)
10776+
10777+static inline void __local_lock_irq(struct local_irq_lock *lv)
10778+{
10779+ spin_lock_irqsave(&lv->lock, lv->flags);
10780+ LL_WARN(lv->owner);
10781+ LL_WARN(lv->nestcnt);
10782+ lv->owner = current;
10783+ lv->nestcnt = 1;
10784+}
10785+
10786+#define local_lock_irq(lvar) \
10787+ do { __local_lock_irq(&get_local_var(lvar)); } while (0)
10788+
10789+#define local_lock_irq_on(lvar, cpu) \
10790+ do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
10791+
10792+static inline void __local_unlock_irq(struct local_irq_lock *lv)
10793+{
10794+ LL_WARN(!lv->nestcnt);
10795+ LL_WARN(lv->owner != current);
10796+ lv->owner = NULL;
10797+ lv->nestcnt = 0;
10798+ spin_unlock_irq(&lv->lock);
10799+}
10800+
10801+#define local_unlock_irq(lvar) \
10802+ do { \
10803+ __local_unlock_irq(&__get_cpu_var(lvar)); \
10804+ put_local_var(lvar); \
10805+ } while (0)
10806+
10807+#define local_unlock_irq_on(lvar, cpu) \
10808+ do { \
10809+ __local_unlock_irq(&per_cpu(lvar, cpu)); \
10810+ } while (0)
10811+
10812+static inline int __local_lock_irqsave(struct local_irq_lock *lv)
10813+{
10814+ if (lv->owner != current) {
10815+ __local_lock_irq(lv);
10816+ return 0;
10817+ } else {
10818+ lv->nestcnt++;
10819+ return 1;
10820+ }
10821+}
10822+
10823+#define local_lock_irqsave(lvar, _flags) \
10824+ do { \
10825+ if (__local_lock_irqsave(&get_local_var(lvar))) \
10826+ put_local_var(lvar); \
10827+ _flags = __get_cpu_var(lvar).flags; \
10828+ } while (0)
10829+
10830+#define local_lock_irqsave_on(lvar, _flags, cpu) \
10831+ do { \
10832+ __local_lock_irqsave(&per_cpu(lvar, cpu)); \
10833+ _flags = per_cpu(lvar, cpu).flags; \
10834+ } while (0)
10835+
10836+static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
10837+ unsigned long flags)
10838+{
10839+ LL_WARN(!lv->nestcnt);
10840+ LL_WARN(lv->owner != current);
10841+ if (--lv->nestcnt)
10842+ return 0;
10843+
10844+ lv->owner = NULL;
10845+ spin_unlock_irqrestore(&lv->lock, lv->flags);
10846+ return 1;
10847+}
10848+
10849+#define local_unlock_irqrestore(lvar, flags) \
10850+ do { \
10851+ if (__local_unlock_irqrestore(&__get_cpu_var(lvar), flags)) \
10852+ put_local_var(lvar); \
10853+ } while (0)
10854+
10855+#define local_unlock_irqrestore_on(lvar, flags, cpu) \
10856+ do { \
10857+ __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
10858+ } while (0)
10859+
10860+#define local_spin_trylock_irq(lvar, lock) \
10861+ ({ \
10862+ int __locked; \
10863+ local_lock_irq(lvar); \
10864+ __locked = spin_trylock(lock); \
10865+ if (!__locked) \
10866+ local_unlock_irq(lvar); \
10867+ __locked; \
10868+ })
10869+
10870+#define local_spin_lock_irq(lvar, lock) \
10871+ do { \
10872+ local_lock_irq(lvar); \
10873+ spin_lock(lock); \
10874+ } while (0)
10875+
10876+#define local_spin_unlock_irq(lvar, lock) \
10877+ do { \
10878+ spin_unlock(lock); \
10879+ local_unlock_irq(lvar); \
10880+ } while (0)
10881+
10882+#define local_spin_lock_irqsave(lvar, lock, flags) \
10883+ do { \
10884+ local_lock_irqsave(lvar, flags); \
10885+ spin_lock(lock); \
10886+ } while (0)
10887+
10888+#define local_spin_unlock_irqrestore(lvar, lock, flags) \
10889+ do { \
10890+ spin_unlock(lock); \
10891+ local_unlock_irqrestore(lvar, flags); \
10892+ } while (0)
10893+
10894+#define get_locked_var(lvar, var) \
10895+ (*({ \
10896+ local_lock(lvar); \
10897+ &__get_cpu_var(var); \
10898+ }))
10899+
10900+#define put_locked_var(lvar, var) local_unlock(lvar)
10901+
10902+#define local_lock_cpu(lvar) \
10903+ ({ \
10904+ local_lock(lvar); \
10905+ smp_processor_id(); \
10906+ })
10907+
10908+#define local_unlock_cpu(lvar) local_unlock(lvar)
10909+
10910+#else /* PREEMPT_RT_BASE */
10911+
10912+#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
10913+#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
10914+
10915+static inline void local_irq_lock_init(int lvar) { }
10916+
10917+#define local_lock(lvar) preempt_disable()
10918+#define local_unlock(lvar) preempt_enable()
10919+#define local_lock_irq(lvar) local_irq_disable()
10920+#define local_unlock_irq(lvar) local_irq_enable()
10921+#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
10922+#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
10923+
10924+#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
10925+#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
10926+#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
10927+#define local_spin_lock_irqsave(lvar, lock, flags) \
10928+ spin_lock_irqsave(lock, flags)
10929+#define local_spin_unlock_irqrestore(lvar, lock, flags) \
10930+ spin_unlock_irqrestore(lock, flags)
10931+
10932+#define get_locked_var(lvar, var) get_cpu_var(var)
10933+#define put_locked_var(lvar, var) put_cpu_var(var)
10934+
10935+#define local_lock_cpu(lvar) get_cpu()
10936+#define local_unlock_cpu(lvar) put_cpu()
10937+
10938+#endif
10939+
10940+#endif
10941diff --git a/include/linux/mm.h b/include/linux/mm.h
10942index 9568b90..e3b3a15 100644
10943--- a/include/linux/mm.h
10944+++ b/include/linux/mm.h
10945@@ -1259,27 +1259,59 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
10946 * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
10947 * When freeing, reset page->mapping so free_pages_check won't complain.
10948 */
10949+#ifndef CONFIG_PREEMPT_RT_FULL
10950+
10951 #define __pte_lockptr(page) &((page)->ptl)
10952-#define pte_lock_init(_page) do { \
10953- spin_lock_init(__pte_lockptr(_page)); \
10954-} while (0)
10955+
10956+static inline struct page *pte_lock_init(struct page *page)
10957+{
10958+ spin_lock_init(__pte_lockptr(page));
10959+ return page;
10960+}
10961+
10962 #define pte_lock_deinit(page) ((page)->mapping = NULL)
10963+
10964+#else /* !PREEMPT_RT_FULL */
10965+
10966+/*
10967+ * On PREEMPT_RT_FULL the spinlock_t's are too large to embed in the
10968+ * page frame, hence it only has a pointer and we need to dynamically
10969+ * allocate the lock when we allocate PTE-pages.
10970+ *
10971+ * This is an overall win, since only a small fraction of the pages
10972+ * will be PTE pages under normal circumstances.
10973+ */
10974+
10975+#define __pte_lockptr(page) ((page)->ptl)
10976+
10977+extern struct page *pte_lock_init(struct page *page);
10978+extern void pte_lock_deinit(struct page *page);
10979+
10980+#endif /* PREEMPT_RT_FULL */
10981+
10982 #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
10983 #else /* !USE_SPLIT_PTLOCKS */
10984 /*
10985 * We use mm->page_table_lock to guard all pagetable pages of the mm.
10986 */
10987-#define pte_lock_init(page) do {} while (0)
10988+static inline struct page *pte_lock_init(struct page *page) { return page; }
10989 #define pte_lock_deinit(page) do {} while (0)
10990 #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
10991 #endif /* USE_SPLIT_PTLOCKS */
10992
10993-static inline void pgtable_page_ctor(struct page *page)
10994+static inline struct page *__pgtable_page_ctor(struct page *page)
10995 {
10996- pte_lock_init(page);
10997- inc_zone_page_state(page, NR_PAGETABLE);
10998+ page = pte_lock_init(page);
10999+ if (page)
11000+ inc_zone_page_state(page, NR_PAGETABLE);
11001+ return page;
11002 }
11003
11004+#define pgtable_page_ctor(page) \
11005+do { \
11006+ page = __pgtable_page_ctor(page); \
11007+} while (0)
11008+
11009 static inline void pgtable_page_dtor(struct page *page)
11010 {
11011 pte_lock_deinit(page);
11012diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
11013index f8f5162..6270199 100644
11014--- a/include/linux/mm_types.h
11015+++ b/include/linux/mm_types.h
11016@@ -11,6 +11,7 @@
11017 #include <linux/completion.h>
11018 #include <linux/cpumask.h>
11019 #include <linux/page-debug-flags.h>
11020+#include <linux/rcupdate.h>
11021 #include <linux/uprobes.h>
11022 #include <asm/page.h>
11023 #include <asm/mmu.h>
11024@@ -141,7 +142,11 @@ struct page {
11025 * system if PG_buddy is set.
11026 */
11027 #if USE_SPLIT_PTLOCKS
11028+# ifndef CONFIG_PREEMPT_RT_FULL
11029 spinlock_t ptl;
11030+# else
11031+ spinlock_t *ptl;
11032+# endif
11033 #endif
11034 struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */
11035 struct page *first_page; /* Compound tail pages */
11036@@ -436,6 +441,9 @@ struct mm_struct {
11037 int first_nid;
11038 #endif
11039 struct uprobes_state uprobes_state;
11040+#ifdef CONFIG_PREEMPT_RT_BASE
11041+ struct rcu_head delayed_drop;
11042+#endif
11043 };
11044
11045 /* first nid will either be a valid NID or one of these values */
11046diff --git a/include/linux/mutex.h b/include/linux/mutex.h
11047index 9121595..bdf1da2 100644
11048--- a/include/linux/mutex.h
11049+++ b/include/linux/mutex.h
11050@@ -17,6 +17,17 @@
11051
11052 #include <linux/atomic.h>
11053
11054+#ifdef CONFIG_DEBUG_LOCK_ALLOC
11055+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
11056+ , .dep_map = { .name = #lockname }
11057+#else
11058+# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
11059+#endif
11060+
11061+#ifdef CONFIG_PREEMPT_RT_FULL
11062+# include <linux/mutex_rt.h>
11063+#else
11064+
11065 /*
11066 * Simple, straightforward mutexes with strict semantics:
11067 *
11068@@ -95,13 +106,6 @@ do { \
11069 static inline void mutex_destroy(struct mutex *lock) {}
11070 #endif
11071
11072-#ifdef CONFIG_DEBUG_LOCK_ALLOC
11073-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
11074- , .dep_map = { .name = #lockname }
11075-#else
11076-# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
11077-#endif
11078-
11079 #define __MUTEX_INITIALIZER(lockname) \
11080 { .count = ATOMIC_INIT(1) \
11081 , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
11082@@ -167,6 +171,9 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
11083 */
11084 extern int mutex_trylock(struct mutex *lock);
11085 extern void mutex_unlock(struct mutex *lock);
11086+
11087+#endif /* !PREEMPT_RT_FULL */
11088+
11089 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
11090
11091 #ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
11092diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h
11093new file mode 100644
11094index 0000000..c38a44b
11095--- /dev/null
11096+++ b/include/linux/mutex_rt.h
11097@@ -0,0 +1,84 @@
11098+#ifndef __LINUX_MUTEX_RT_H
11099+#define __LINUX_MUTEX_RT_H
11100+
11101+#ifndef __LINUX_MUTEX_H
11102+#error "Please include mutex.h"
11103+#endif
11104+
11105+#include <linux/rtmutex.h>
11106+
11107+/* FIXME: Just for __lockfunc */
11108+#include <linux/spinlock.h>
11109+
11110+struct mutex {
11111+ struct rt_mutex lock;
11112+#ifdef CONFIG_DEBUG_LOCK_ALLOC
11113+ struct lockdep_map dep_map;
11114+#endif
11115+};
11116+
11117+#define __MUTEX_INITIALIZER(mutexname) \
11118+ { \
11119+ .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \
11120+ __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
11121+ }
11122+
11123+#define DEFINE_MUTEX(mutexname) \
11124+ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
11125+
11126+extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
11127+extern void __lockfunc _mutex_lock(struct mutex *lock);
11128+extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
11129+extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
11130+extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
11131+extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
11132+extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
11133+extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
11134+extern int __lockfunc _mutex_trylock(struct mutex *lock);
11135+extern void __lockfunc _mutex_unlock(struct mutex *lock);
11136+
11137+#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
11138+#define mutex_lock(l) _mutex_lock(l)
11139+#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
11140+#define mutex_lock_killable(l) _mutex_lock_killable(l)
11141+#define mutex_trylock(l) _mutex_trylock(l)
11142+#define mutex_unlock(l) _mutex_unlock(l)
11143+#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
11144+
11145+#ifdef CONFIG_DEBUG_LOCK_ALLOC
11146+# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
11147+# define mutex_lock_interruptible_nested(l, s) \
11148+ _mutex_lock_interruptible_nested(l, s)
11149+# define mutex_lock_killable_nested(l, s) \
11150+ _mutex_lock_killable_nested(l, s)
11151+
11152+# define mutex_lock_nest_lock(lock, nest_lock) \
11153+do { \
11154+ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
11155+ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
11156+} while (0)
11157+
11158+#else
11159+# define mutex_lock_nested(l, s) _mutex_lock(l)
11160+# define mutex_lock_interruptible_nested(l, s) \
11161+ _mutex_lock_interruptible(l)
11162+# define mutex_lock_killable_nested(l, s) \
11163+ _mutex_lock_killable(l)
11164+# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
11165+#endif
11166+
11167+# define mutex_init(mutex) \
11168+do { \
11169+ static struct lock_class_key __key; \
11170+ \
11171+ rt_mutex_init(&(mutex)->lock); \
11172+ __mutex_do_init((mutex), #mutex, &__key); \
11173+} while (0)
11174+
11175+# define __mutex_init(mutex, name, key) \
11176+do { \
11177+ rt_mutex_init(&(mutex)->lock); \
11178+ __mutex_do_init((mutex), name, key); \
11179+} while (0)
11180+
11181+#endif
11182diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
11183index 0e182f9..0b58fd6 100644
11184--- a/include/linux/netdevice.h
11185+++ b/include/linux/netdevice.h
11186@@ -1579,7 +1579,7 @@ extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
11187
11188 extern rwlock_t dev_base_lock; /* Device list lock */
11189
11190-extern seqcount_t devnet_rename_seq; /* Device rename seq */
11191+extern struct mutex devnet_rename_mutex;
11192
11193
11194 #define for_each_netdev(net, d) \
11195@@ -1783,6 +1783,7 @@ struct softnet_data {
11196 unsigned int dropped;
11197 struct sk_buff_head input_pkt_queue;
11198 struct napi_struct backlog;
11199+ struct sk_buff_head tofree_queue;
11200 };
11201
11202 static inline void input_queue_head_incr(struct softnet_data *sd)
11203diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
11204index dd49566..7d083af 100644
11205--- a/include/linux/netfilter/x_tables.h
11206+++ b/include/linux/netfilter/x_tables.h
11207@@ -3,6 +3,7 @@
11208
11209
11210 #include <linux/netdevice.h>
11211+#include <linux/locallock.h>
11212 #include <uapi/linux/netfilter/x_tables.h>
11213
11214 /**
11215@@ -284,6 +285,8 @@ extern void xt_free_table_info(struct xt_table_info *info);
11216 */
11217 DECLARE_PER_CPU(seqcount_t, xt_recseq);
11218
11219+DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
11220+
11221 /**
11222 * xt_write_recseq_begin - start of a write section
11223 *
11224@@ -298,6 +301,9 @@ static inline unsigned int xt_write_recseq_begin(void)
11225 {
11226 unsigned int addend;
11227
11228+ /* RT protection */
11229+ local_lock(xt_write_lock);
11230+
11231 /*
11232 * Low order bit of sequence is set if we already
11233 * called xt_write_recseq_begin().
11234@@ -328,6 +334,7 @@ static inline void xt_write_recseq_end(unsigned int addend)
11235 /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
11236 smp_wmb();
11237 __this_cpu_add(xt_recseq.sequence, addend);
11238+ local_unlock(xt_write_lock);
11239 }
11240
11241 /*
11242diff --git a/include/linux/notifier.h b/include/linux/notifier.h
11243index d65746e..6bfd703 100644
11244--- a/include/linux/notifier.h
11245+++ b/include/linux/notifier.h
11246@@ -42,9 +42,7 @@
11247 * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
11248 * As compensation, srcu_notifier_chain_unregister() is rather expensive.
11249 * SRCU notifier chains should be used when the chain will be called very
11250- * often but notifier_blocks will seldom be removed. Also, SRCU notifier
11251- * chains are slightly more difficult to use because they require special
11252- * runtime initialization.
11253+ * often but notifier_blocks will seldom be removed.
11254 */
11255
11256 struct notifier_block {
11257@@ -85,7 +83,7 @@ struct srcu_notifier_head {
11258 (name)->head = NULL; \
11259 } while (0)
11260
11261-/* srcu_notifier_heads must be initialized and cleaned up dynamically */
11262+/* srcu_notifier_heads must be cleaned up dynamically */
11263 extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
11264 #define srcu_cleanup_notifier_head(name) \
11265 cleanup_srcu_struct(&(name)->srcu);
11266@@ -98,7 +96,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
11267 .head = NULL }
11268 #define RAW_NOTIFIER_INIT(name) { \
11269 .head = NULL }
11270-/* srcu_notifier_heads cannot be initialized statically */
11271+
11272+#define SRCU_NOTIFIER_INIT(name, pcpu) \
11273+ { \
11274+ .mutex = __MUTEX_INITIALIZER(name.mutex), \
11275+ .head = NULL, \
11276+ .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \
11277+ }
11278
11279 #define ATOMIC_NOTIFIER_HEAD(name) \
11280 struct atomic_notifier_head name = \
11281@@ -110,6 +114,18 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
11282 struct raw_notifier_head name = \
11283 RAW_NOTIFIER_INIT(name)
11284
11285+#define _SRCU_NOTIFIER_HEAD(name, mod) \
11286+ static DEFINE_PER_CPU(struct srcu_struct_array, \
11287+ name##_head_srcu_array); \
11288+ mod struct srcu_notifier_head name = \
11289+ SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
11290+
11291+#define SRCU_NOTIFIER_HEAD(name) \
11292+ _SRCU_NOTIFIER_HEAD(name, )
11293+
11294+#define SRCU_NOTIFIER_HEAD_STATIC(name) \
11295+ _SRCU_NOTIFIER_HEAD(name, static)
11296+
11297 #ifdef __KERNEL__
11298
11299 extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
11300diff --git a/include/linux/of.h b/include/linux/of.h
11301index 5ebcc5c..bb35c42 100644
11302--- a/include/linux/of.h
11303+++ b/include/linux/of.h
11304@@ -92,7 +92,7 @@ static inline void of_node_put(struct device_node *node) { }
11305 extern struct device_node *of_allnodes;
11306 extern struct device_node *of_chosen;
11307 extern struct device_node *of_aliases;
11308-extern rwlock_t devtree_lock;
11309+extern raw_spinlock_t devtree_lock;
11310
11311 static inline bool of_have_populated_dt(void)
11312 {
11313diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
11314new file mode 100644
11315index 0000000..364dda7
11316--- /dev/null
11317+++ b/include/linux/of_dma.h
11318@@ -0,0 +1,72 @@
11319+/*
11320+ * OF helpers for DMA request / controller
11321+ *
11322+ * Based on of_gpio.h
11323+ *
11324+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
11325+ *
11326+ * This program is free software; you can redistribute it and/or modify
11327+ * it under the terms of the GNU General Public License version 2 as
11328+ * published by the Free Software Foundation.
11329+ */
11330+
11331+#ifndef __LINUX_OF_DMA_H
11332+#define __LINUX_OF_DMA_H
11333+
11334+#include <linux/of.h>
11335+#include <linux/dmaengine.h>
11336+
11337+struct device_node;
11338+
11339+struct of_dma {
11340+ struct list_head of_dma_controllers;
11341+ struct device_node *of_node;
11342+ int of_dma_nbcells;
11343+ struct dma_chan *(*of_dma_xlate)
11344+ (struct of_phandle_args *, struct of_dma *);
11345+ void *of_dma_data;
11346+};
11347+
11348+struct of_dma_filter_info {
11349+ dma_cap_mask_t dma_cap;
11350+ dma_filter_fn filter_fn;
11351+};
11352+
11353+#ifdef CONFIG_OF
11354+extern int of_dma_controller_register(struct device_node *np,
11355+ struct dma_chan *(*of_dma_xlate)
11356+ (struct of_phandle_args *, struct of_dma *),
11357+ void *data);
11358+extern void of_dma_controller_free(struct device_node *np);
11359+extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
11360+ const char *name);
11361+extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
11362+ struct of_dma *ofdma);
11363+#else
11364+static inline int of_dma_controller_register(struct device_node *np,
11365+ struct dma_chan *(*of_dma_xlate)
11366+ (struct of_phandle_args *, struct of_dma *),
11367+ void *data)
11368+{
11369+ return -ENODEV;
11370+}
11371+
11372+static inline void of_dma_controller_free(struct device_node *np)
11373+{
11374+}
11375+
11376+static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
11377+ const char *name)
11378+{
11379+ return NULL;
11380+}
11381+
11382+static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
11383+ struct of_dma *ofdma)
11384+{
11385+ return NULL;
11386+}
11387+
11388+#endif
11389+
11390+#endif /* __LINUX_OF_DMA_H */
11391diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
11392index 777a524..ca67e80 100644
11393--- a/include/linux/page_cgroup.h
11394+++ b/include/linux/page_cgroup.h
11395@@ -24,6 +24,9 @@ enum {
11396 */
11397 struct page_cgroup {
11398 unsigned long flags;
11399+#ifdef CONFIG_PREEMPT_RT_BASE
11400+ spinlock_t pcg_lock;
11401+#endif
11402 struct mem_cgroup *mem_cgroup;
11403 };
11404
11405@@ -74,12 +77,20 @@ static inline void lock_page_cgroup(struct page_cgroup *pc)
11406 * Don't take this lock in IRQ context.
11407 * This lock is for pc->mem_cgroup, USED, MIGRATION
11408 */
11409+#ifndef CONFIG_PREEMPT_RT_BASE
11410 bit_spin_lock(PCG_LOCK, &pc->flags);
11411+#else
11412+ spin_lock(&pc->pcg_lock);
11413+#endif
11414 }
11415
11416 static inline void unlock_page_cgroup(struct page_cgroup *pc)
11417 {
11418+#ifndef CONFIG_PREEMPT_RT_BASE
11419 bit_spin_unlock(PCG_LOCK, &pc->flags);
11420+#else
11421+ spin_unlock(&pc->pcg_lock);
11422+#endif
11423 }
11424
11425 #else /* CONFIG_MEMCG */
11426@@ -102,6 +113,10 @@ static inline void __init page_cgroup_init_flatmem(void)
11427 {
11428 }
11429
11430+static inline void page_cgroup_lock_init(struct page_cgroup *pc)
11431+{
11432+}
11433+
11434 #endif /* CONFIG_MEMCG */
11435
11436 #include <linux/swap.h>
11437diff --git a/include/linux/percpu.h b/include/linux/percpu.h
11438index cc88172..12b394f 100644
11439--- a/include/linux/percpu.h
11440+++ b/include/linux/percpu.h
11441@@ -48,6 +48,31 @@
11442 preempt_enable(); \
11443 } while (0)
11444
11445+#ifndef CONFIG_PREEMPT_RT_FULL
11446+# define get_local_var(var) get_cpu_var(var)
11447+# define put_local_var(var) put_cpu_var(var)
11448+# define get_local_ptr(var) get_cpu_ptr(var)
11449+# define put_local_ptr(var) put_cpu_ptr(var)
11450+#else
11451+# define get_local_var(var) (*({ \
11452+ migrate_disable(); \
11453+ &__get_cpu_var(var); }))
11454+
11455+# define put_local_var(var) do { \
11456+ (void)&(var); \
11457+ migrate_enable(); \
11458+} while (0)
11459+
11460+# define get_local_ptr(var) ({ \
11461+ migrate_disable(); \
11462+ this_cpu_ptr(var); })
11463+
11464+# define put_local_ptr(var) do { \
11465+ (void)(var); \
11466+ migrate_enable(); \
11467+} while (0)
11468+#endif
11469+
11470 /* minimum unit size, also is the maximum supported allocation size */
11471 #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
11472
11473diff --git a/include/linux/pid.h b/include/linux/pid.h
11474index 2381c97..3b67343 100644
11475--- a/include/linux/pid.h
11476+++ b/include/linux/pid.h
11477@@ -2,6 +2,7 @@
11478 #define _LINUX_PID_H
11479
11480 #include <linux/rcupdate.h>
11481+#include <linux/atomic.h>
11482
11483 enum pid_type
11484 {
11485diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h
11486index 24368a2..bb3cd58 100644
11487--- a/include/linux/platform_data/cpsw.h
11488+++ b/include/linux/platform_data/cpsw.h
11489@@ -21,6 +21,8 @@ struct cpsw_slave_data {
11490 char phy_id[MII_BUS_ID_SIZE];
11491 int phy_if;
11492 u8 mac_addr[ETH_ALEN];
11493+ u16 dual_emac_res_vlan; /* Reserved VLAN for DualEMAC */
11494+
11495 };
11496
11497 struct cpsw_platform_data {
11498@@ -28,13 +30,15 @@ struct cpsw_platform_data {
11499 u32 channels; /* number of cpdma channels (symmetric) */
11500 u32 slaves; /* number of slave cpgmac ports */
11501 struct cpsw_slave_data *slave_data;
11502- u32 cpts_active_slave; /* time stamping slave */
11503+ u32 active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */
11504 u32 cpts_clock_mult; /* convert input clock ticks to nanoseconds */
11505 u32 cpts_clock_shift; /* convert input clock ticks to nanoseconds */
11506 u32 ale_entries; /* ale table size */
11507 u32 bd_ram_size; /*buffer descriptor ram size */
11508 u32 rx_descs; /* Number of Rx Descriptios */
11509 u32 mac_control; /* Mac control register */
11510+ u16 default_vlan; /* Def VLAN for ALE lookup in VLAN aware mode*/
11511+ bool dual_emac; /* Enable Dual EMAC mode */
11512 };
11513
11514 #endif /* __CPSW_H__ */
11515diff --git a/include/linux/preempt.h b/include/linux/preempt.h
11516index 87a03c7..a7f4212 100644
11517--- a/include/linux/preempt.h
11518+++ b/include/linux/preempt.h
11519@@ -23,15 +23,38 @@
11520
11521 #define preempt_count() (current_thread_info()->preempt_count)
11522
11523+#ifdef CONFIG_PREEMPT_LAZY
11524+#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
11525+#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
11526+#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
11527+#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
11528+#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
11529+#else
11530+#define add_preempt_lazy_count(val) do { } while (0)
11531+#define sub_preempt_lazy_count(val) do { } while (0)
11532+#define inc_preempt_lazy_count() do { } while (0)
11533+#define dec_preempt_lazy_count() do { } while (0)
11534+#define preempt_lazy_count() (0)
11535+#endif
11536+
11537 #ifdef CONFIG_PREEMPT
11538
11539 asmlinkage void preempt_schedule(void);
11540
11541+# ifdef CONFIG_PREEMPT_LAZY
11542 #define preempt_check_resched() \
11543 do { \
11544- if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
11545+ if (unlikely(test_thread_flag(TIF_NEED_RESCHED) || \
11546+ test_thread_flag(TIF_NEED_RESCHED_LAZY))) \
11547 preempt_schedule(); \
11548 } while (0)
11549+# else
11550+#define preempt_check_resched() \
11551+do { \
11552+ if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
11553+ preempt_schedule(); \
11554+} while (0)
11555+# endif
11556
11557 #else /* !CONFIG_PREEMPT */
11558
11559@@ -48,17 +71,36 @@ do { \
11560 barrier(); \
11561 } while (0)
11562
11563+#define preempt_lazy_disable() \
11564+do { \
11565+ inc_preempt_lazy_count(); \
11566+ barrier(); \
11567+} while (0)
11568+
11569 #define sched_preempt_enable_no_resched() \
11570 do { \
11571 barrier(); \
11572 dec_preempt_count(); \
11573 } while (0)
11574
11575-#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
11576+#ifndef CONFIG_PREEMPT_RT_BASE
11577+# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
11578+# define preempt_check_resched_rt() barrier()
11579+#else
11580+# define preempt_enable_no_resched() preempt_enable()
11581+# define preempt_check_resched_rt() preempt_check_resched()
11582+#endif
11583
11584 #define preempt_enable() \
11585 do { \
11586- preempt_enable_no_resched(); \
11587+ sched_preempt_enable_no_resched(); \
11588+ barrier(); \
11589+ preempt_check_resched(); \
11590+} while (0)
11591+
11592+#define preempt_lazy_enable() \
11593+do { \
11594+ dec_preempt_lazy_count(); \
11595 barrier(); \
11596 preempt_check_resched(); \
11597 } while (0)
11598@@ -107,9 +149,31 @@ do { \
11599 #define preempt_disable_notrace() barrier()
11600 #define preempt_enable_no_resched_notrace() barrier()
11601 #define preempt_enable_notrace() barrier()
11602+#define preempt_check_resched_rt() barrier()
11603
11604 #endif /* CONFIG_PREEMPT_COUNT */
11605
11606+#ifdef CONFIG_PREEMPT_RT_FULL
11607+# define preempt_disable_rt() preempt_disable()
11608+# define preempt_enable_rt() preempt_enable()
11609+# define preempt_disable_nort() barrier()
11610+# define preempt_enable_nort() barrier()
11611+# ifdef CONFIG_SMP
11612+ extern void migrate_disable(void);
11613+ extern void migrate_enable(void);
11614+# else /* CONFIG_SMP */
11615+# define migrate_disable() barrier()
11616+# define migrate_enable() barrier()
11617+# endif /* CONFIG_SMP */
11618+#else
11619+# define preempt_disable_rt() barrier()
11620+# define preempt_enable_rt() barrier()
11621+# define preempt_disable_nort() preempt_disable()
11622+# define preempt_enable_nort() preempt_enable()
11623+# define migrate_disable() preempt_disable()
11624+# define migrate_enable() preempt_enable()
11625+#endif
11626+
11627 #ifdef CONFIG_PREEMPT_NOTIFIERS
11628
11629 struct preempt_notifier;
11630diff --git a/include/linux/printk.h b/include/linux/printk.h
11631index 9afc01e..812d102 100644
11632--- a/include/linux/printk.h
11633+++ b/include/linux/printk.h
11634@@ -95,8 +95,16 @@ int no_printk(const char *fmt, ...)
11635 return 0;
11636 }
11637
11638+#ifdef CONFIG_EARLY_PRINTK
11639 extern asmlinkage __printf(1, 2)
11640 void early_printk(const char *fmt, ...);
11641+void early_vprintk(const char *fmt, va_list ap);
11642+extern void printk_kill(void);
11643+#else
11644+static inline __printf(1, 2) __cold
11645+void early_printk(const char *s, ...) { }
11646+static inline void printk_kill(void) { }
11647+#endif
11648
11649 extern int printk_needs_cpu(int cpu);
11650 extern void printk_tick(void);
11651@@ -132,7 +140,6 @@ extern int __printk_ratelimit(const char *func);
11652 #define printk_ratelimit() __printk_ratelimit(__func__)
11653 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
11654 unsigned int interval_msec);
11655-
11656 extern int printk_delay_msec;
11657 extern int dmesg_restrict;
11658 extern int kptr_restrict;
11659diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
11660index ffc444c..7ddfbf9 100644
11661--- a/include/linux/radix-tree.h
11662+++ b/include/linux/radix-tree.h
11663@@ -230,7 +230,13 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root,
11664 unsigned long index, unsigned long max_scan);
11665 unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
11666 unsigned long index, unsigned long max_scan);
11667+
11668+#ifndef CONFIG_PREEMPT_RT_FULL
11669 int radix_tree_preload(gfp_t gfp_mask);
11670+#else
11671+static inline int radix_tree_preload(gfp_t gm) { return 0; }
11672+#endif
11673+
11674 void radix_tree_init(void);
11675 void *radix_tree_tag_set(struct radix_tree_root *root,
11676 unsigned long index, unsigned int tag);
11677@@ -255,7 +261,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
11678
11679 static inline void radix_tree_preload_end(void)
11680 {
11681- preempt_enable();
11682+ preempt_enable_nort();
11683 }
11684
11685 /**
11686diff --git a/include/linux/random.h b/include/linux/random.h
11687index d984608..f975382 100644
11688--- a/include/linux/random.h
11689+++ b/include/linux/random.h
11690@@ -12,7 +12,7 @@
11691 extern void add_device_randomness(const void *, unsigned int);
11692 extern void add_input_randomness(unsigned int type, unsigned int code,
11693 unsigned int value);
11694-extern void add_interrupt_randomness(int irq, int irq_flags);
11695+extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip);
11696
11697 extern void get_random_bytes(void *buf, int nbytes);
11698 extern void get_random_bytes_arch(void *buf, int nbytes);
11699diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
11700index 275aa3f..33e1d2e 100644
11701--- a/include/linux/rcupdate.h
11702+++ b/include/linux/rcupdate.h
11703@@ -120,6 +120,9 @@ extern void call_rcu(struct rcu_head *head,
11704
11705 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
11706
11707+#ifdef CONFIG_PREEMPT_RT_FULL
11708+#define call_rcu_bh call_rcu
11709+#else
11710 /**
11711 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
11712 * @head: structure to be used for queueing the RCU updates.
11713@@ -143,6 +146,7 @@ extern void call_rcu(struct rcu_head *head,
11714 */
11715 extern void call_rcu_bh(struct rcu_head *head,
11716 void (*func)(struct rcu_head *head));
11717+#endif
11718
11719 /**
11720 * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
11721@@ -182,6 +186,11 @@ void synchronize_rcu(void);
11722 * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
11723 */
11724 #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
11725+#ifndef CONFIG_PREEMPT_RT_FULL
11726+#define sched_rcu_preempt_depth() rcu_preempt_depth()
11727+#else
11728+static inline int sched_rcu_preempt_depth(void) { return 0; }
11729+#endif
11730
11731 #else /* #ifdef CONFIG_PREEMPT_RCU */
11732
11733@@ -205,6 +214,8 @@ static inline int rcu_preempt_depth(void)
11734 return 0;
11735 }
11736
11737+#define sched_rcu_preempt_depth() rcu_preempt_depth()
11738+
11739 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
11740
11741 /* Internal to kernel */
11742@@ -359,7 +370,14 @@ static inline int rcu_read_lock_held(void)
11743 * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
11744 * hell.
11745 */
11746+#ifdef CONFIG_PREEMPT_RT_FULL
11747+static inline int rcu_read_lock_bh_held(void)
11748+{
11749+ return rcu_read_lock_held();
11750+}
11751+#else
11752 extern int rcu_read_lock_bh_held(void);
11753+#endif
11754
11755 /**
11756 * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
11757@@ -807,10 +825,14 @@ static inline void rcu_read_unlock(void)
11758 static inline void rcu_read_lock_bh(void)
11759 {
11760 local_bh_disable();
11761+#ifdef CONFIG_PREEMPT_RT_FULL
11762+ rcu_read_lock();
11763+#else
11764 __acquire(RCU_BH);
11765 rcu_lock_acquire(&rcu_bh_lock_map);
11766 rcu_lockdep_assert(!rcu_is_cpu_idle(),
11767 "rcu_read_lock_bh() used illegally while idle");
11768+#endif
11769 }
11770
11771 /*
11772@@ -820,10 +842,14 @@ static inline void rcu_read_lock_bh(void)
11773 */
11774 static inline void rcu_read_unlock_bh(void)
11775 {
11776+#ifdef CONFIG_PREEMPT_RT_FULL
11777+ rcu_read_unlock();
11778+#else
11779 rcu_lockdep_assert(!rcu_is_cpu_idle(),
11780 "rcu_read_unlock_bh() used illegally while idle");
11781 rcu_lock_release(&rcu_bh_lock_map);
11782 __release(RCU_BH);
11783+#endif
11784 local_bh_enable();
11785 }
11786
11787diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
11788index 952b793..f1472a2 100644
11789--- a/include/linux/rcutree.h
11790+++ b/include/linux/rcutree.h
11791@@ -45,7 +45,11 @@ static inline void rcu_virt_note_context_switch(int cpu)
11792 rcu_note_context_switch(cpu);
11793 }
11794
11795+#ifdef CONFIG_PREEMPT_RT_FULL
11796+# define synchronize_rcu_bh synchronize_rcu
11797+#else
11798 extern void synchronize_rcu_bh(void);
11799+#endif
11800 extern void synchronize_sched_expedited(void);
11801 extern void synchronize_rcu_expedited(void);
11802
11803@@ -73,20 +77,30 @@ static inline void synchronize_rcu_bh_expedited(void)
11804 }
11805
11806 extern void rcu_barrier(void);
11807+#ifdef CONFIG_PREEMPT_RT_FULL
11808+# define rcu_barrier_bh rcu_barrier
11809+#else
11810 extern void rcu_barrier_bh(void);
11811+#endif
11812 extern void rcu_barrier_sched(void);
11813
11814 extern unsigned long rcutorture_testseq;
11815 extern unsigned long rcutorture_vernum;
11816 extern long rcu_batches_completed(void);
11817-extern long rcu_batches_completed_bh(void);
11818 extern long rcu_batches_completed_sched(void);
11819
11820 extern void rcu_force_quiescent_state(void);
11821-extern void rcu_bh_force_quiescent_state(void);
11822 extern void rcu_sched_force_quiescent_state(void);
11823
11824 extern void rcu_scheduler_starting(void);
11825 extern int rcu_scheduler_active __read_mostly;
11826
11827+#ifndef CONFIG_PREEMPT_RT_FULL
11828+extern void rcu_bh_force_quiescent_state(void);
11829+extern long rcu_batches_completed_bh(void);
11830+#else
11831+# define rcu_bh_force_quiescent_state rcu_force_quiescent_state
11832+# define rcu_batches_completed_bh rcu_batches_completed
11833+#endif
11834+
11835 #endif /* __LINUX_RCUTREE_H */
11836diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
11837index de17134..5ebd0bb 100644
11838--- a/include/linux/rtmutex.h
11839+++ b/include/linux/rtmutex.h
11840@@ -14,7 +14,7 @@
11841
11842 #include <linux/linkage.h>
11843 #include <linux/plist.h>
11844-#include <linux/spinlock_types.h>
11845+#include <linux/spinlock_types_raw.h>
11846
11847 extern int max_lock_depth; /* for sysctl */
11848
11849@@ -29,9 +29,10 @@ struct rt_mutex {
11850 raw_spinlock_t wait_lock;
11851 struct plist_head wait_list;
11852 struct task_struct *owner;
11853-#ifdef CONFIG_DEBUG_RT_MUTEXES
11854 int save_state;
11855- const char *name, *file;
11856+#ifdef CONFIG_DEBUG_RT_MUTEXES
11857+ const char *file;
11858+ const char *name;
11859 int line;
11860 void *magic;
11861 #endif
11862@@ -56,19 +57,39 @@ struct hrtimer_sleeper;
11863 #ifdef CONFIG_DEBUG_RT_MUTEXES
11864 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
11865 , .name = #mutexname, .file = __FILE__, .line = __LINE__
11866-# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
11867+
11868+# define rt_mutex_init(mutex) \
11869+ do { \
11870+ raw_spin_lock_init(&(mutex)->wait_lock); \
11871+ __rt_mutex_init(mutex, #mutex); \
11872+ } while (0)
11873+
11874 extern void rt_mutex_debug_task_free(struct task_struct *tsk);
11875 #else
11876 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
11877-# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
11878+
11879+# define rt_mutex_init(mutex) \
11880+ do { \
11881+ raw_spin_lock_init(&(mutex)->wait_lock); \
11882+ __rt_mutex_init(mutex, #mutex); \
11883+ } while (0)
11884+
11885 # define rt_mutex_debug_task_free(t) do { } while (0)
11886 #endif
11887
11888-#define __RT_MUTEX_INITIALIZER(mutexname) \
11889- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
11890+#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
11891+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
11892 , .wait_list = PLIST_HEAD_INIT(mutexname.wait_list) \
11893 , .owner = NULL \
11894- __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
11895+ __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
11896+
11897+
11898+#define __RT_MUTEX_INITIALIZER(mutexname) \
11899+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
11900+
11901+#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
11902+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
11903+ , .save_state = 1 }
11904
11905 #define DEFINE_RT_MUTEX(mutexname) \
11906 struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
11907@@ -90,6 +111,7 @@ extern void rt_mutex_destroy(struct rt_mutex *lock);
11908 extern void rt_mutex_lock(struct rt_mutex *lock);
11909 extern int rt_mutex_lock_interruptible(struct rt_mutex *lock,
11910 int detect_deadlock);
11911+extern int rt_mutex_lock_killable(struct rt_mutex *lock, int detect_deadlock);
11912 extern int rt_mutex_timed_lock(struct rt_mutex *lock,
11913 struct hrtimer_sleeper *timeout,
11914 int detect_deadlock);
11915diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
11916index bc2994e..d6cfc0a 100644
11917--- a/include/linux/rwlock.h
11918+++ b/include/linux/rwlock.h
11919@@ -24,8 +24,11 @@ do { \
11920 __rwlock_init((lock), #lock, &__key); \
11921 } while (0)
11922 #else
11923+
11924+# ifndef rwlock_init
11925 # define rwlock_init(lock) \
11926 do { *(lock) = __RW_LOCK_UNLOCKED(lock); } while (0)
11927+# endif
11928 #endif
11929
11930 #ifdef CONFIG_DEBUG_SPINLOCK
11931@@ -38,88 +41,173 @@ do { \
11932 extern int do_raw_write_trylock(rwlock_t *lock);
11933 extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock);
11934 #else
11935+
11936+# ifndef do_raw_read_lock
11937 # define do_raw_read_lock(rwlock) do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0)
11938+# endif
11939+
11940+# ifndef do_raw_read_lock_flags
11941 # define do_raw_read_lock_flags(lock, flags) \
11942 do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
11943+# endif
11944+
11945+# ifndef do_raw_read_trylock
11946 # define do_raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock)
11947+# endif
11948+
11949+# ifndef do_raw_read_unlock
11950 # define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
11951+# endif
11952+
11953+# ifndef do_raw_write_lock
11954 # define do_raw_write_lock(rwlock) do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0)
11955+# endif
11956+
11957+# ifndef do_raw_write_lock_flags
11958 # define do_raw_write_lock_flags(lock, flags) \
11959 do {__acquire(lock); arch_write_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
11960+# endif
11961+
11962+# ifndef do_raw_write_trylock
11963 # define do_raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock)
11964+# endif
11965+
11966+# ifndef do_raw_write_unlock
11967 # define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
11968+# endif
11969+
11970 #endif
11971
11972+#ifndef read_can_lock
11973 #define read_can_lock(rwlock) arch_read_can_lock(&(rwlock)->raw_lock)
11974+#endif
11975+
11976+#ifndef write_can_lock
11977 #define write_can_lock(rwlock) arch_write_can_lock(&(rwlock)->raw_lock)
11978+#endif
11979
11980 /*
11981 * Define the various rw_lock methods. Note we define these
11982 * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various
11983 * methods are defined as nops in the case they are not required.
11984 */
11985+#ifndef read_trylock
11986 #define read_trylock(lock) __cond_lock(lock, _raw_read_trylock(lock))
11987+#endif
11988+
11989+#ifndef write_trylock
11990 #define write_trylock(lock) __cond_lock(lock, _raw_write_trylock(lock))
11991+#endif
11992
11993+#ifndef write_lock
11994 #define write_lock(lock) _raw_write_lock(lock)
11995+#endif
11996+
11997+#ifndef read_lock
11998 #define read_lock(lock) _raw_read_lock(lock)
11999+#endif
12000
12001 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
12002
12003+#ifndef read_lock_irqsave
12004 #define read_lock_irqsave(lock, flags) \
12005 do { \
12006 typecheck(unsigned long, flags); \
12007 flags = _raw_read_lock_irqsave(lock); \
12008 } while (0)
12009+#endif
12010+
12011+#ifndef write_lock_irqsave
12012 #define write_lock_irqsave(lock, flags) \
12013 do { \
12014 typecheck(unsigned long, flags); \
12015 flags = _raw_write_lock_irqsave(lock); \
12016 } while (0)
12017+#endif
12018
12019 #else
12020
12021+#ifndef read_lock_irqsave
12022 #define read_lock_irqsave(lock, flags) \
12023 do { \
12024 typecheck(unsigned long, flags); \
12025 _raw_read_lock_irqsave(lock, flags); \
12026 } while (0)
12027+#endif
12028+
12029+#ifndef write_lock_irqsave
12030 #define write_lock_irqsave(lock, flags) \
12031 do { \
12032 typecheck(unsigned long, flags); \
12033 _raw_write_lock_irqsave(lock, flags); \
12034 } while (0)
12035+#endif
12036
12037 #endif
12038
12039+#ifndef read_lock_irq
12040 #define read_lock_irq(lock) _raw_read_lock_irq(lock)
12041+#endif
12042+
12043+#ifndef read_lock_bh
12044 #define read_lock_bh(lock) _raw_read_lock_bh(lock)
12045+#endif
12046+
12047+#ifndef write_lock_irq
12048 #define write_lock_irq(lock) _raw_write_lock_irq(lock)
12049+#endif
12050+
12051+#ifndef write_lock_bh
12052 #define write_lock_bh(lock) _raw_write_lock_bh(lock)
12053+#endif
12054+
12055+#ifndef read_unlock
12056 #define read_unlock(lock) _raw_read_unlock(lock)
12057+#endif
12058+
12059+#ifndef write_unlock
12060 #define write_unlock(lock) _raw_write_unlock(lock)
12061+#endif
12062+
12063+#ifndef read_unlock_irq
12064 #define read_unlock_irq(lock) _raw_read_unlock_irq(lock)
12065+#endif
12066+
12067+#ifndef write_unlock_irq
12068 #define write_unlock_irq(lock) _raw_write_unlock_irq(lock)
12069+#endif
12070
12071+#ifndef read_unlock_irqrestore
12072 #define read_unlock_irqrestore(lock, flags) \
12073 do { \
12074 typecheck(unsigned long, flags); \
12075 _raw_read_unlock_irqrestore(lock, flags); \
12076 } while (0)
12077+#endif
12078+
12079+#ifndef read_unlock_bh
12080 #define read_unlock_bh(lock) _raw_read_unlock_bh(lock)
12081+#endif
12082
12083+#ifndef write_unlock_irqrestore
12084 #define write_unlock_irqrestore(lock, flags) \
12085 do { \
12086 typecheck(unsigned long, flags); \
12087 _raw_write_unlock_irqrestore(lock, flags); \
12088 } while (0)
12089+#endif
12090+
12091+#ifndef write_unlock_bh
12092 #define write_unlock_bh(lock) _raw_write_unlock_bh(lock)
12093+#endif
12094
12095+#ifndef write_trylock_irqsave
12096 #define write_trylock_irqsave(lock, flags) \
12097 ({ \
12098 local_irq_save(flags); \
12099 write_trylock(lock) ? \
12100 1 : ({ local_irq_restore(flags); 0; }); \
12101 })
12102+#endif
12103
12104 #endif /* __LINUX_RWLOCK_H */
12105diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
12106new file mode 100644
12107index 0000000..853ee36
12108--- /dev/null
12109+++ b/include/linux/rwlock_rt.h
12110@@ -0,0 +1,123 @@
12111+#ifndef __LINUX_RWLOCK_RT_H
12112+#define __LINUX_RWLOCK_RT_H
12113+
12114+#ifndef __LINUX_SPINLOCK_H
12115+#error Do not include directly. Use spinlock.h
12116+#endif
12117+
12118+#define rwlock_init(rwl) \
12119+do { \
12120+ static struct lock_class_key __key; \
12121+ \
12122+ rt_mutex_init(&(rwl)->lock); \
12123+ __rt_rwlock_init(rwl, #rwl, &__key); \
12124+} while (0)
12125+
12126+extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
12127+extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
12128+extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
12129+extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
12130+extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
12131+extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
12132+extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
12133+extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
12134+extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
12135+extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
12136+
12137+#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
12138+#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
12139+
12140+#define write_trylock_irqsave(lock, flags) \
12141+ __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
12142+
12143+#define read_lock_irqsave(lock, flags) \
12144+ do { \
12145+ typecheck(unsigned long, flags); \
12146+ migrate_disable(); \
12147+ flags = rt_read_lock_irqsave(lock); \
12148+ } while (0)
12149+
12150+#define write_lock_irqsave(lock, flags) \
12151+ do { \
12152+ typecheck(unsigned long, flags); \
12153+ migrate_disable(); \
12154+ flags = rt_write_lock_irqsave(lock); \
12155+ } while (0)
12156+
12157+#define read_lock(lock) \
12158+ do { \
12159+ migrate_disable(); \
12160+ rt_read_lock(lock); \
12161+ } while (0)
12162+
12163+#define read_lock_bh(lock) \
12164+ do { \
12165+ local_bh_disable(); \
12166+ migrate_disable(); \
12167+ rt_read_lock(lock); \
12168+ } while (0)
12169+
12170+#define read_lock_irq(lock) read_lock(lock)
12171+
12172+#define write_lock(lock) \
12173+ do { \
12174+ migrate_disable(); \
12175+ rt_write_lock(lock); \
12176+ } while (0)
12177+
12178+#define write_lock_bh(lock) \
12179+ do { \
12180+ local_bh_disable(); \
12181+ migrate_disable(); \
12182+ rt_write_lock(lock); \
12183+ } while (0)
12184+
12185+#define write_lock_irq(lock) write_lock(lock)
12186+
12187+#define read_unlock(lock) \
12188+ do { \
12189+ rt_read_unlock(lock); \
12190+ migrate_enable(); \
12191+ } while (0)
12192+
12193+#define read_unlock_bh(lock) \
12194+ do { \
12195+ rt_read_unlock(lock); \
12196+ migrate_enable(); \
12197+ local_bh_enable(); \
12198+ } while (0)
12199+
12200+#define read_unlock_irq(lock) read_unlock(lock)
12201+
12202+#define write_unlock(lock) \
12203+ do { \
12204+ rt_write_unlock(lock); \
12205+ migrate_enable(); \
12206+ } while (0)
12207+
12208+#define write_unlock_bh(lock) \
12209+ do { \
12210+ rt_write_unlock(lock); \
12211+ migrate_enable(); \
12212+ local_bh_enable(); \
12213+ } while (0)
12214+
12215+#define write_unlock_irq(lock) write_unlock(lock)
12216+
12217+#define read_unlock_irqrestore(lock, flags) \
12218+ do { \
12219+ typecheck(unsigned long, flags); \
12220+ (void) flags; \
12221+ rt_read_unlock(lock); \
12222+ migrate_enable(); \
12223+ } while (0)
12224+
12225+#define write_unlock_irqrestore(lock, flags) \
12226+ do { \
12227+ typecheck(unsigned long, flags); \
12228+ (void) flags; \
12229+ rt_write_unlock(lock); \
12230+ migrate_enable(); \
12231+ } while (0)
12232+
12233+#endif
12234diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
12235index cc0072e..d0da966 100644
12236--- a/include/linux/rwlock_types.h
12237+++ b/include/linux/rwlock_types.h
12238@@ -1,6 +1,10 @@
12239 #ifndef __LINUX_RWLOCK_TYPES_H
12240 #define __LINUX_RWLOCK_TYPES_H
12241
12242+#if !defined(__LINUX_SPINLOCK_TYPES_H)
12243+# error "Do not include directly, include spinlock_types.h"
12244+#endif
12245+
12246 /*
12247 * include/linux/rwlock_types.h - generic rwlock type definitions
12248 * and initializers
12249@@ -43,6 +47,7 @@ typedef struct {
12250 RW_DEP_MAP_INIT(lockname) }
12251 #endif
12252
12253-#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
12254+#define DEFINE_RWLOCK(name) \
12255+ rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
12256
12257 #endif /* __LINUX_RWLOCK_TYPES_H */
12258diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h
12259new file mode 100644
12260index 0000000..b138321
12261--- /dev/null
12262+++ b/include/linux/rwlock_types_rt.h
12263@@ -0,0 +1,33 @@
12264+#ifndef __LINUX_RWLOCK_TYPES_RT_H
12265+#define __LINUX_RWLOCK_TYPES_RT_H
12266+
12267+#ifndef __LINUX_SPINLOCK_TYPES_H
12268+#error "Do not include directly. Include spinlock_types.h instead"
12269+#endif
12270+
12271+/*
12272+ * rwlocks - rtmutex which allows single reader recursion
12273+ */
12274+typedef struct {
12275+ struct rt_mutex lock;
12276+ int read_depth;
12277+ unsigned int break_lock;
12278+#ifdef CONFIG_DEBUG_LOCK_ALLOC
12279+ struct lockdep_map dep_map;
12280+#endif
12281+} rwlock_t;
12282+
12283+#ifdef CONFIG_DEBUG_LOCK_ALLOC
12284+# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
12285+#else
12286+# define RW_DEP_MAP_INIT(lockname)
12287+#endif
12288+
12289+#define __RW_LOCK_UNLOCKED(name) \
12290+ { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
12291+ RW_DEP_MAP_INIT(name) }
12292+
12293+#define DEFINE_RWLOCK(name) \
12294+ rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
12295+
12296+#endif
12297diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
12298index 8da67d6..f994bd3 100644
12299--- a/include/linux/rwsem.h
12300+++ b/include/linux/rwsem.h
12301@@ -16,6 +16,10 @@
12302
12303 #include <linux/atomic.h>
12304
12305+#ifdef CONFIG_PREEMPT_RT_FULL
12306+#include <linux/rwsem_rt.h>
12307+#else /* PREEMPT_RT_FULL */
12308+
12309 struct rw_semaphore;
12310
12311 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
12312@@ -139,4 +143,6 @@ do { \
12313 # define down_write_nested(sem, subclass) down_write(sem)
12314 #endif
12315
12316+#endif /* !PREEMPT_RT_FULL */
12317+
12318 #endif /* _LINUX_RWSEM_H */
12319diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h
12320new file mode 100644
12321index 0000000..e94d945
12322--- /dev/null
12323+++ b/include/linux/rwsem_rt.h
12324@@ -0,0 +1,128 @@
12325+#ifndef _LINUX_RWSEM_RT_H
12326+#define _LINUX_RWSEM_RT_H
12327+
12328+#ifndef _LINUX_RWSEM_H
12329+#error "Include rwsem.h"
12330+#endif
12331+
12332+/*
12333+ * RW-semaphores are a spinlock plus a reader-depth count.
12334+ *
12335+ * Note that the semantics are different from the usual
12336+ * Linux rw-sems, in PREEMPT_RT mode we do not allow
12337+ * multiple readers to hold the lock at once, we only allow
12338+ * a read-lock owner to read-lock recursively. This is
12339+ * better for latency, makes the implementation inherently
12340+ * fair and makes it simpler as well.
12341+ */
12342+
12343+#include <linux/rtmutex.h>
12344+
12345+struct rw_semaphore {
12346+ struct rt_mutex lock;
12347+ int read_depth;
12348+#ifdef CONFIG_DEBUG_LOCK_ALLOC
12349+ struct lockdep_map dep_map;
12350+#endif
12351+};
12352+
12353+#define __RWSEM_INITIALIZER(name) \
12354+ { .lock = __RT_MUTEX_INITIALIZER(name.lock), \
12355+ RW_DEP_MAP_INIT(name) }
12356+
12357+#define DECLARE_RWSEM(lockname) \
12358+ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
12359+
12360+extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
12361+ struct lock_class_key *key);
12362+
12363+#define __rt_init_rwsem(sem, name, key) \
12364+ do { \
12365+ rt_mutex_init(&(sem)->lock); \
12366+ __rt_rwsem_init((sem), (name), (key));\
12367+ } while (0)
12368+
12369+#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key)
12370+
12371+# define rt_init_rwsem(sem) \
12372+do { \
12373+ static struct lock_class_key __key; \
12374+ \
12375+ __rt_init_rwsem((sem), #sem, &__key); \
12376+} while (0)
12377+
12378+extern void rt_down_write(struct rw_semaphore *rwsem);
12379+extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass);
12380+extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass);
12381+extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
12382+ struct lockdep_map *nest);
12383+extern void rt_down_read(struct rw_semaphore *rwsem);
12384+extern int rt_down_write_trylock(struct rw_semaphore *rwsem);
12385+extern int rt_down_read_trylock(struct rw_semaphore *rwsem);
12386+extern void rt_up_read(struct rw_semaphore *rwsem);
12387+extern void rt_up_write(struct rw_semaphore *rwsem);
12388+extern void rt_downgrade_write(struct rw_semaphore *rwsem);
12389+
12390+#define init_rwsem(sem) rt_init_rwsem(sem)
12391+#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock)
12392+
12393+static inline void down_read(struct rw_semaphore *sem)
12394+{
12395+ rt_down_read(sem);
12396+}
12397+
12398+static inline int down_read_trylock(struct rw_semaphore *sem)
12399+{
12400+ return rt_down_read_trylock(sem);
12401+}
12402+
12403+static inline void down_write(struct rw_semaphore *sem)
12404+{
12405+ rt_down_write(sem);
12406+}
12407+
12408+static inline int down_write_trylock(struct rw_semaphore *sem)
12409+{
12410+ return rt_down_write_trylock(sem);
12411+}
12412+
12413+static inline void up_read(struct rw_semaphore *sem)
12414+{
12415+ rt_up_read(sem);
12416+}
12417+
12418+static inline void up_write(struct rw_semaphore *sem)
12419+{
12420+ rt_up_write(sem);
12421+}
12422+
12423+static inline void downgrade_write(struct rw_semaphore *sem)
12424+{
12425+ rt_downgrade_write(sem);
12426+}
12427+
12428+static inline void down_read_nested(struct rw_semaphore *sem, int subclass)
12429+{
12430+ return rt_down_read_nested(sem, subclass);
12431+}
12432+
12433+static inline void down_write_nested(struct rw_semaphore *sem, int subclass)
12434+{
12435+ rt_down_write_nested(sem, subclass);
12436+}
12437+#ifdef CONFIG_DEBUG_LOCK_ALLOC
12438+static inline void down_write_nest_lock(struct rw_semaphore *sem,
12439+ struct rw_semaphore *nest_lock)
12440+{
12441+ rt_down_write_nested_lock(sem, &nest_lock->dep_map);
12442+}
12443+
12444+#else
12445+
12446+static inline void down_write_nest_lock(struct rw_semaphore *sem,
12447+ struct rw_semaphore *nest_lock)
12448+{
12449+ rt_down_write_nested_lock(sem, NULL);
12450+}
12451+#endif
12452+#endif
12453diff --git a/include/linux/sched.h b/include/linux/sched.h
12454index 7e49270..8c4837b 100644
12455--- a/include/linux/sched.h
12456+++ b/include/linux/sched.h
12457@@ -23,6 +23,7 @@ struct sched_param {
12458 #include <linux/nodemask.h>
12459 #include <linux/mm_types.h>
12460
12461+#include <asm/kmap_types.h>
12462 #include <asm/page.h>
12463 #include <asm/ptrace.h>
12464 #include <asm/cputime.h>
12465@@ -51,6 +52,7 @@ struct sched_param {
12466 #include <linux/cred.h>
12467 #include <linux/llist.h>
12468 #include <linux/uidgid.h>
12469+#include <linux/hardirq.h>
12470
12471 #include <asm/processor.h>
12472
12473@@ -1062,6 +1064,7 @@ struct sched_domain;
12474 #define WF_SYNC 0x01 /* waker goes to sleep after wakup */
12475 #define WF_FORK 0x02 /* child wakeup after fork */
12476 #define WF_MIGRATED 0x04 /* internal use, task got migrated */
12477+#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
12478
12479 #define ENQUEUE_WAKEUP 1
12480 #define ENQUEUE_HEAD 2
12481@@ -1238,6 +1241,7 @@ enum perf_event_task_context {
12482
12483 struct task_struct {
12484 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
12485+ volatile long saved_state; /* saved state for "spinlock sleepers" */
12486 void *stack;
12487 atomic_t usage;
12488 unsigned int flags; /* per process flags, defined below */
12489@@ -1277,6 +1281,12 @@ struct task_struct {
12490 #endif
12491
12492 unsigned int policy;
12493+#ifdef CONFIG_PREEMPT_RT_FULL
12494+ int migrate_disable;
12495+# ifdef CONFIG_SCHED_DEBUG
12496+ int migrate_disable_atomic;
12497+# endif
12498+#endif
12499 int nr_cpus_allowed;
12500 cpumask_t cpus_allowed;
12501
12502@@ -1377,6 +1387,9 @@ struct task_struct {
12503
12504 struct task_cputime cputime_expires;
12505 struct list_head cpu_timers[3];
12506+#ifdef CONFIG_PREEMPT_RT_BASE
12507+ struct task_struct *posix_timer_list;
12508+#endif
12509
12510 /* process credentials */
12511 const struct cred __rcu *real_cred; /* objective and real subjective task
12512@@ -1408,10 +1421,15 @@ struct task_struct {
12513 /* signal handlers */
12514 struct signal_struct *signal;
12515 struct sighand_struct *sighand;
12516+ struct sigqueue *sigqueue_cache;
12517
12518 sigset_t blocked, real_blocked;
12519 sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
12520 struct sigpending pending;
12521+#ifdef CONFIG_PREEMPT_RT_FULL
12522+ /* TODO: move me into ->restart_block ? */
12523+ struct siginfo forced_info;
12524+#endif
12525
12526 unsigned long sas_ss_sp;
12527 size_t sas_ss_size;
12528@@ -1448,6 +1466,9 @@ struct task_struct {
12529 /* mutex deadlock detection */
12530 struct mutex_waiter *blocked_on;
12531 #endif
12532+#ifdef CONFIG_PREEMPT_RT_FULL
12533+ int pagefault_disabled;
12534+#endif
12535 #ifdef CONFIG_TRACE_IRQFLAGS
12536 unsigned int irq_events;
12537 unsigned long hardirq_enable_ip;
12538@@ -1590,6 +1611,12 @@ struct task_struct {
12539 unsigned long trace;
12540 /* bitmask and counter of trace recursion */
12541 unsigned long trace_recursion;
12542+#ifdef CONFIG_WAKEUP_LATENCY_HIST
12543+ u64 preempt_timestamp_hist;
12544+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
12545+ long timer_offset;
12546+#endif
12547+#endif
12548 #endif /* CONFIG_TRACING */
12549 #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
12550 struct memcg_batch_info {
12551@@ -1606,11 +1633,22 @@ struct task_struct {
12552 #ifdef CONFIG_UPROBES
12553 struct uprobe_task *utask;
12554 #endif
12555+#ifdef CONFIG_PREEMPT_RT_BASE
12556+ struct rcu_head put_rcu;
12557+ int softirq_nestcnt;
12558+ unsigned int softirqs_raised;
12559+#endif
12560+#ifdef CONFIG_PREEMPT_RT_FULL
12561+# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
12562+ int kmap_idx;
12563+ pte_t kmap_pte[KM_TYPE_NR];
12564+# endif
12565+#endif
12566+#ifdef CONFIG_DEBUG_PREEMPT
12567+ unsigned long preempt_disable_ip;
12568+#endif
12569 };
12570
12571-/* Future-safe accessor for struct task_struct's cpus_allowed. */
12572-#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
12573-
12574 #ifdef CONFIG_NUMA_BALANCING
12575 extern void task_numa_fault(int node, int pages, bool migrated);
12576 extern void set_numabalancing_state(bool enabled);
12577@@ -1623,6 +1661,17 @@ static inline void set_numabalancing_state(bool enabled)
12578 }
12579 #endif
12580
12581+#ifdef CONFIG_PREEMPT_RT_FULL
12582+static inline bool cur_pf_disabled(void) { return current->pagefault_disabled; }
12583+#else
12584+static inline bool cur_pf_disabled(void) { return false; }
12585+#endif
12586+
12587+static inline bool pagefault_disabled(void)
12588+{
12589+ return in_atomic() || cur_pf_disabled();
12590+}
12591+
12592 /*
12593 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
12594 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
12595@@ -1785,6 +1834,15 @@ extern struct pid *cad_pid;
12596 extern void free_task(struct task_struct *tsk);
12597 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
12598
12599+#ifdef CONFIG_PREEMPT_RT_BASE
12600+extern void __put_task_struct_cb(struct rcu_head *rhp);
12601+
12602+static inline void put_task_struct(struct task_struct *t)
12603+{
12604+ if (atomic_dec_and_test(&t->usage))
12605+ call_rcu(&t->put_rcu, __put_task_struct_cb);
12606+}
12607+#else
12608 extern void __put_task_struct(struct task_struct *t);
12609
12610 static inline void put_task_struct(struct task_struct *t)
12611@@ -1792,6 +1850,7 @@ static inline void put_task_struct(struct task_struct *t)
12612 if (atomic_dec_and_test(&t->usage))
12613 __put_task_struct(t);
12614 }
12615+#endif
12616
12617 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
12618 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
12619@@ -1799,6 +1858,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
12620 /*
12621 * Per process flags
12622 */
12623+#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */
12624 #define PF_EXITING 0x00000004 /* getting shut down */
12625 #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
12626 #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
12627@@ -1816,6 +1876,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
12628 #define PF_FROZEN 0x00010000 /* frozen for system suspend */
12629 #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */
12630 #define PF_KSWAPD 0x00040000 /* I am kswapd */
12631+#define PF_STOMPER 0x00080000 /* I am a stomp machine thread */
12632 #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
12633 #define PF_KTHREAD 0x00200000 /* I am a kernel thread */
12634 #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
12635@@ -1922,6 +1983,10 @@ extern void do_set_cpus_allowed(struct task_struct *p,
12636
12637 extern int set_cpus_allowed_ptr(struct task_struct *p,
12638 const struct cpumask *new_mask);
12639+int migrate_me(void);
12640+void tell_sched_cpu_down_begin(int cpu);
12641+void tell_sched_cpu_down_done(int cpu);
12642+
12643 #else
12644 static inline void do_set_cpus_allowed(struct task_struct *p,
12645 const struct cpumask *new_mask)
12646@@ -1934,6 +1999,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
12647 return -EINVAL;
12648 return 0;
12649 }
12650+static inline int migrate_me(void) { return 0; }
12651+static inline void tell_sched_cpu_down_begin(int cpu) { }
12652+static inline void tell_sched_cpu_down_done(int cpu) { }
12653 #endif
12654
12655 #ifdef CONFIG_NO_HZ
12656@@ -2108,6 +2176,7 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice;
12657 #ifdef CONFIG_RT_MUTEXES
12658 extern int rt_mutex_getprio(struct task_struct *p);
12659 extern void rt_mutex_setprio(struct task_struct *p, int prio);
12660+extern int rt_mutex_check_prio(struct task_struct *task, int newprio);
12661 extern void rt_mutex_adjust_pi(struct task_struct *p);
12662 static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
12663 {
12664@@ -2118,6 +2187,10 @@ static inline int rt_mutex_getprio(struct task_struct *p)
12665 {
12666 return p->normal_prio;
12667 }
12668+static inline int rt_mutex_check_prio(struct task_struct *task, int newprio)
12669+{
12670+ return 0;
12671+}
12672 # define rt_mutex_adjust_pi(p) do { } while (0)
12673 static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
12674 {
12675@@ -2209,6 +2282,7 @@ extern void xtime_update(unsigned long ticks);
12676
12677 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
12678 extern int wake_up_process(struct task_struct *tsk);
12679+extern int wake_up_lock_sleeper(struct task_struct * tsk);
12680 extern void wake_up_new_task(struct task_struct *tsk);
12681 #ifdef CONFIG_SMP
12682 extern void kick_process(struct task_struct *tsk);
12683@@ -2313,12 +2387,24 @@ extern struct mm_struct * mm_alloc(void);
12684
12685 /* mmdrop drops the mm and the page tables */
12686 extern void __mmdrop(struct mm_struct *);
12687+
12688 static inline void mmdrop(struct mm_struct * mm)
12689 {
12690 if (unlikely(atomic_dec_and_test(&mm->mm_count)))
12691 __mmdrop(mm);
12692 }
12693
12694+#ifdef CONFIG_PREEMPT_RT_BASE
12695+extern void __mmdrop_delayed(struct rcu_head *rhp);
12696+static inline void mmdrop_delayed(struct mm_struct *mm)
12697+{
12698+ if (atomic_dec_and_test(&mm->mm_count))
12699+ call_rcu(&mm->delayed_drop, __mmdrop_delayed);
12700+}
12701+#else
12702+# define mmdrop_delayed(mm) mmdrop(mm)
12703+#endif
12704+
12705 /* mmput gets rid of the mappings and all user-space */
12706 extern void mmput(struct mm_struct *);
12707 /* Grab a reference to a task's mm, if it is not already going away */
12708@@ -2611,6 +2697,52 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
12709 return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
12710 }
12711
12712+#ifdef CONFIG_PREEMPT_LAZY
12713+static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
12714+{
12715+ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
12716+}
12717+
12718+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
12719+{
12720+ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
12721+}
12722+
12723+static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
12724+{
12725+ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
12726+}
12727+
12728+static inline int need_resched_lazy(void)
12729+{
12730+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
12731+}
12732+
12733+static inline int need_resched_now(void)
12734+{
12735+ return test_thread_flag(TIF_NEED_RESCHED);
12736+}
12737+
12738+static inline int need_resched(void)
12739+{
12740+ return test_thread_flag(TIF_NEED_RESCHED) ||
12741+ test_thread_flag(TIF_NEED_RESCHED_LAZY);
12742+}
12743+#else
12744+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
12745+static inline int need_resched_lazy(void) { return 0; }
12746+
12747+static inline int need_resched_now(void)
12748+{
12749+ return test_thread_flag(TIF_NEED_RESCHED);
12750+}
12751+
12752+static inline int need_resched(void)
12753+{
12754+ return test_thread_flag(TIF_NEED_RESCHED);
12755+}
12756+#endif
12757+
12758 static inline int restart_syscall(void)
12759 {
12760 set_tsk_thread_flag(current, TIF_SIGPENDING);
12761@@ -2642,11 +2774,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
12762 return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
12763 }
12764
12765-static inline int need_resched(void)
12766-{
12767- return unlikely(test_thread_flag(TIF_NEED_RESCHED));
12768-}
12769-
12770 /*
12771 * cond_resched() and cond_resched_lock(): latency reduction via
12772 * explicit rescheduling in places that are safe. The return
12773@@ -2663,7 +2790,7 @@ extern int _cond_resched(void);
12774
12775 extern int __cond_resched_lock(spinlock_t *lock);
12776
12777-#ifdef CONFIG_PREEMPT_COUNT
12778+#if defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT_FULL)
12779 #define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
12780 #else
12781 #define PREEMPT_LOCK_OFFSET 0
12782@@ -2674,12 +2801,16 @@ extern int __cond_resched_lock(spinlock_t *lock);
12783 __cond_resched_lock(lock); \
12784 })
12785
12786+#ifndef CONFIG_PREEMPT_RT_FULL
12787 extern int __cond_resched_softirq(void);
12788
12789 #define cond_resched_softirq() ({ \
12790 __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
12791 __cond_resched_softirq(); \
12792 })
12793+#else
12794+# define cond_resched_softirq() cond_resched()
12795+#endif
12796
12797 /*
12798 * Does a critical section need to be broken due to another
12799@@ -2751,6 +2882,26 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
12800
12801 #endif /* CONFIG_SMP */
12802
12803+static inline int __migrate_disabled(struct task_struct *p)
12804+{
12805+#ifdef CONFIG_PREEMPT_RT_FULL
12806+ return p->migrate_disable;
12807+#else
12808+ return 0;
12809+#endif
12810+}
12811+
12812+/* Future-safe accessor for struct task_struct's cpus_allowed. */
12813+static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
12814+{
12815+#ifdef CONFIG_PREEMPT_RT_FULL
12816+ if (p->migrate_disable)
12817+ return cpumask_of(task_cpu(p));
12818+#endif
12819+
12820+ return &p->cpus_allowed;
12821+}
12822+
12823 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
12824 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
12825
12826diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
12827index 600060e2..939ea1a 100644
12828--- a/include/linux/seqlock.h
12829+++ b/include/linux/seqlock.h
12830@@ -30,92 +30,12 @@
12831 #include <linux/preempt.h>
12832 #include <asm/processor.h>
12833
12834-typedef struct {
12835- unsigned sequence;
12836- spinlock_t lock;
12837-} seqlock_t;
12838-
12839-/*
12840- * These macros triggered gcc-3.x compile-time problems. We think these are
12841- * OK now. Be cautious.
12842- */
12843-#define __SEQLOCK_UNLOCKED(lockname) \
12844- { 0, __SPIN_LOCK_UNLOCKED(lockname) }
12845-
12846-#define seqlock_init(x) \
12847- do { \
12848- (x)->sequence = 0; \
12849- spin_lock_init(&(x)->lock); \
12850- } while (0)
12851-
12852-#define DEFINE_SEQLOCK(x) \
12853- seqlock_t x = __SEQLOCK_UNLOCKED(x)
12854-
12855-/* Lock out other writers and update the count.
12856- * Acts like a normal spin_lock/unlock.
12857- * Don't need preempt_disable() because that is in the spin_lock already.
12858- */
12859-static inline void write_seqlock(seqlock_t *sl)
12860-{
12861- spin_lock(&sl->lock);
12862- ++sl->sequence;
12863- smp_wmb();
12864-}
12865-
12866-static inline void write_sequnlock(seqlock_t *sl)
12867-{
12868- smp_wmb();
12869- sl->sequence++;
12870- spin_unlock(&sl->lock);
12871-}
12872-
12873-static inline int write_tryseqlock(seqlock_t *sl)
12874-{
12875- int ret = spin_trylock(&sl->lock);
12876-
12877- if (ret) {
12878- ++sl->sequence;
12879- smp_wmb();
12880- }
12881- return ret;
12882-}
12883-
12884-/* Start of read calculation -- fetch last complete writer token */
12885-static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
12886-{
12887- unsigned ret;
12888-
12889-repeat:
12890- ret = ACCESS_ONCE(sl->sequence);
12891- if (unlikely(ret & 1)) {
12892- cpu_relax();
12893- goto repeat;
12894- }
12895- smp_rmb();
12896-
12897- return ret;
12898-}
12899-
12900-/*
12901- * Test if reader processed invalid data.
12902- *
12903- * If sequence value changed then writer changed data while in section.
12904- */
12905-static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start)
12906-{
12907- smp_rmb();
12908-
12909- return unlikely(sl->sequence != start);
12910-}
12911-
12912-
12913 /*
12914 * Version using sequence counter only.
12915 * This can be used when code has its own mutex protecting the
12916 * updating starting before the write_seqcountbeqin() and ending
12917 * after the write_seqcount_end().
12918 */
12919-
12920 typedef struct seqcount {
12921 unsigned sequence;
12922 } seqcount_t;
12923@@ -218,7 +138,6 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
12924 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
12925 {
12926 smp_rmb();
12927-
12928 return __read_seqcount_retry(s, start);
12929 }
12930
12931@@ -227,18 +146,30 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
12932 * Sequence counter only version assumes that callers are using their
12933 * own mutexing.
12934 */
12935-static inline void write_seqcount_begin(seqcount_t *s)
12936+static inline void __write_seqcount_begin(seqcount_t *s)
12937 {
12938 s->sequence++;
12939 smp_wmb();
12940 }
12941
12942-static inline void write_seqcount_end(seqcount_t *s)
12943+static inline void write_seqcount_begin(seqcount_t *s)
12944+{
12945+ preempt_disable_rt();
12946+ __write_seqcount_begin(s);
12947+}
12948+
12949+static inline void __write_seqcount_end(seqcount_t *s)
12950 {
12951 smp_wmb();
12952 s->sequence++;
12953 }
12954
12955+static inline void write_seqcount_end(seqcount_t *s)
12956+{
12957+ __write_seqcount_end(s);
12958+ preempt_enable_rt();
12959+}
12960+
12961 /**
12962 * write_seqcount_barrier - invalidate in-progress read-side seq operations
12963 * @s: pointer to seqcount_t
12964@@ -252,31 +183,124 @@ static inline void write_seqcount_barrier(seqcount_t *s)
12965 s->sequence+=2;
12966 }
12967
12968+typedef struct {
12969+ struct seqcount seqcount;
12970+ spinlock_t lock;
12971+} seqlock_t;
12972+
12973+/*
12974+ * These macros triggered gcc-3.x compile-time problems. We think these are
12975+ * OK now. Be cautious.
12976+ */
12977+#define __SEQLOCK_UNLOCKED(lockname) \
12978+ { \
12979+ .seqcount = SEQCNT_ZERO, \
12980+ .lock = __SPIN_LOCK_UNLOCKED(lockname) \
12981+ }
12982+
12983+#define seqlock_init(x) \
12984+ do { \
12985+ seqcount_init(&(x)->seqcount); \
12986+ spin_lock_init(&(x)->lock); \
12987+ } while (0)
12988+
12989+#define DEFINE_SEQLOCK(x) \
12990+ seqlock_t x = __SEQLOCK_UNLOCKED(x)
12991+
12992+/*
12993+ * Read side functions for starting and finalizing a read side section.
12994+ */
12995+#ifndef CONFIG_PREEMPT_RT_FULL
12996+static inline unsigned read_seqbegin(const seqlock_t *sl)
12997+{
12998+ return read_seqcount_begin(&sl->seqcount);
12999+}
13000+#else
13001+/*
13002+ * Starvation safe read side for RT
13003+ */
13004+static inline unsigned read_seqbegin(seqlock_t *sl)
13005+{
13006+ unsigned ret;
13007+
13008+repeat:
13009+ ret = ACCESS_ONCE(sl->seqcount.sequence);
13010+ if (unlikely(ret & 1)) {
13011+ /*
13012+ * Take the lock and let the writer proceed (i.e. evtl
13013+ * boost it), otherwise we could loop here forever.
13014+ */
13015+ spin_lock(&sl->lock);
13016+ spin_unlock(&sl->lock);
13017+ goto repeat;
13018+ }
13019+ return ret;
13020+}
13021+#endif
13022+
13023+static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
13024+{
13025+ return read_seqcount_retry(&sl->seqcount, start);
13026+}
13027+
13028 /*
13029- * Possible sw/hw IRQ protected versions of the interfaces.
13030+ * Lock out other writers and update the count.
13031+ * Acts like a normal spin_lock/unlock.
13032+ * Don't need preempt_disable() because that is in the spin_lock already.
13033 */
13034+static inline void write_seqlock(seqlock_t *sl)
13035+{
13036+ spin_lock(&sl->lock);
13037+ __write_seqcount_begin(&sl->seqcount);
13038+}
13039+
13040+static inline void write_sequnlock(seqlock_t *sl)
13041+{
13042+ __write_seqcount_end(&sl->seqcount);
13043+ spin_unlock(&sl->lock);
13044+}
13045+
13046+static inline void write_seqlock_bh(seqlock_t *sl)
13047+{
13048+ spin_lock_bh(&sl->lock);
13049+ __write_seqcount_begin(&sl->seqcount);
13050+}
13051+
13052+static inline void write_sequnlock_bh(seqlock_t *sl)
13053+{
13054+ __write_seqcount_end(&sl->seqcount);
13055+ spin_unlock_bh(&sl->lock);
13056+}
13057+
13058+static inline void write_seqlock_irq(seqlock_t *sl)
13059+{
13060+ spin_lock_irq(&sl->lock);
13061+ __write_seqcount_begin(&sl->seqcount);
13062+}
13063+
13064+static inline void write_sequnlock_irq(seqlock_t *sl)
13065+{
13066+ __write_seqcount_end(&sl->seqcount);
13067+ spin_unlock_irq(&sl->lock);
13068+}
13069+
13070+static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
13071+{
13072+ unsigned long flags;
13073+
13074+ spin_lock_irqsave(&sl->lock, flags);
13075+ __write_seqcount_begin(&sl->seqcount);
13076+ return flags;
13077+}
13078+
13079 #define write_seqlock_irqsave(lock, flags) \
13080- do { local_irq_save(flags); write_seqlock(lock); } while (0)
13081-#define write_seqlock_irq(lock) \
13082- do { local_irq_disable(); write_seqlock(lock); } while (0)
13083-#define write_seqlock_bh(lock) \
13084- do { local_bh_disable(); write_seqlock(lock); } while (0)
13085-
13086-#define write_sequnlock_irqrestore(lock, flags) \
13087- do { write_sequnlock(lock); local_irq_restore(flags); } while(0)
13088-#define write_sequnlock_irq(lock) \
13089- do { write_sequnlock(lock); local_irq_enable(); } while(0)
13090-#define write_sequnlock_bh(lock) \
13091- do { write_sequnlock(lock); local_bh_enable(); } while(0)
13092-
13093-#define read_seqbegin_irqsave(lock, flags) \
13094- ({ local_irq_save(flags); read_seqbegin(lock); })
13095-
13096-#define read_seqretry_irqrestore(lock, iv, flags) \
13097- ({ \
13098- int ret = read_seqretry(lock, iv); \
13099- local_irq_restore(flags); \
13100- ret; \
13101- })
13102+ do { flags = __write_seqlock_irqsave(lock); } while (0)
13103+
13104+static inline void
13105+write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
13106+{
13107+ __write_seqcount_end(&sl->seqcount);
13108+ spin_unlock_irqrestore(&sl->lock, flags);
13109+}
13110
13111 #endif /* __LINUX_SEQLOCK_H */
13112diff --git a/include/linux/signal.h b/include/linux/signal.h
13113index 0a89ffc..8942895 100644
13114--- a/include/linux/signal.h
13115+++ b/include/linux/signal.h
13116@@ -226,6 +226,7 @@ static inline void init_sigpending(struct sigpending *sig)
13117 }
13118
13119 extern void flush_sigqueue(struct sigpending *queue);
13120+extern void flush_task_sigqueue(struct task_struct *tsk);
13121
13122 /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
13123 static inline int valid_signal(unsigned long sig)
13124diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
13125index 9fe54b6..3da99c9b 100644
13126--- a/include/linux/skbuff.h
13127+++ b/include/linux/skbuff.h
13128@@ -132,6 +132,7 @@ struct sk_buff_head {
13129
13130 __u32 qlen;
13131 spinlock_t lock;
13132+ raw_spinlock_t raw_lock;
13133 };
13134
13135 struct sk_buff;
13136@@ -1008,6 +1009,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
13137 __skb_queue_head_init(list);
13138 }
13139
13140+static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
13141+{
13142+ raw_spin_lock_init(&list->raw_lock);
13143+ __skb_queue_head_init(list);
13144+}
13145+
13146 static inline void skb_queue_head_init_class(struct sk_buff_head *list,
13147 struct lock_class_key *class)
13148 {
13149diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
13150index 9db4825..a58ad34 100644
13151--- a/include/linux/slub_def.h
13152+++ b/include/linux/slub_def.h
13153@@ -54,7 +54,7 @@ struct kmem_cache_cpu {
13154 };
13155
13156 struct kmem_cache_node {
13157- spinlock_t list_lock; /* Protect partial list and nr_partial */
13158+ raw_spinlock_t list_lock; /* Protect partial list and nr_partial */
13159 unsigned long nr_partial;
13160 struct list_head partial;
13161 #ifdef CONFIG_SLUB_DEBUG
13162diff --git a/include/linux/smp.h b/include/linux/smp.h
13163index dd6f06b..9e3255b 100644
13164--- a/include/linux/smp.h
13165+++ b/include/linux/smp.h
13166@@ -218,6 +218,9 @@ static inline void kick_all_cpus_sync(void) { }
13167 #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
13168 #define put_cpu() preempt_enable()
13169
13170+#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
13171+#define put_cpu_light() migrate_enable()
13172+
13173 /*
13174 * Callback to arch code if there's nosmp or maxcpus=0 on the
13175 * boot command line:
13176diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
13177index 7d537ce..0c11a7c 100644
13178--- a/include/linux/spinlock.h
13179+++ b/include/linux/spinlock.h
13180@@ -254,7 +254,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
13181 #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock))
13182
13183 /* Include rwlock functions */
13184-#include <linux/rwlock.h>
13185+#ifdef CONFIG_PREEMPT_RT_FULL
13186+# include <linux/rwlock_rt.h>
13187+#else
13188+# include <linux/rwlock.h>
13189+#endif
13190
13191 /*
13192 * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
13193@@ -265,6 +269,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
13194 # include <linux/spinlock_api_up.h>
13195 #endif
13196
13197+#ifdef CONFIG_PREEMPT_RT_FULL
13198+# include <linux/spinlock_rt.h>
13199+#else /* PREEMPT_RT_FULL */
13200+
13201 /*
13202 * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
13203 */
13204@@ -394,4 +402,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
13205 #define atomic_dec_and_lock(atomic, lock) \
13206 __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
13207
13208+#endif /* !PREEMPT_RT_FULL */
13209+
13210 #endif /* __LINUX_SPINLOCK_H */
13211diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
13212index 51df117a..3f68f50 100644
13213--- a/include/linux/spinlock_api_smp.h
13214+++ b/include/linux/spinlock_api_smp.h
13215@@ -191,6 +191,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock)
13216 return 0;
13217 }
13218
13219-#include <linux/rwlock_api_smp.h>
13220+#ifndef CONFIG_PREEMPT_RT_FULL
13221+# include <linux/rwlock_api_smp.h>
13222+#endif
13223
13224 #endif /* __LINUX_SPINLOCK_API_SMP_H */
13225diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
13226new file mode 100644
13227index 0000000..0618387
13228--- /dev/null
13229+++ b/include/linux/spinlock_rt.h
13230@@ -0,0 +1,168 @@
13231+#ifndef __LINUX_SPINLOCK_RT_H
13232+#define __LINUX_SPINLOCK_RT_H
13233+
13234+#ifndef __LINUX_SPINLOCK_H
13235+#error Do not include directly. Use spinlock.h
13236+#endif
13237+
13238+#include <linux/bug.h>
13239+
13240+extern void
13241+__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
13242+
13243+#define spin_lock_init(slock) \
13244+do { \
13245+ static struct lock_class_key __key; \
13246+ \
13247+ rt_mutex_init(&(slock)->lock); \
13248+ __rt_spin_lock_init(slock, #slock, &__key); \
13249+} while (0)
13250+
13251+extern void __lockfunc rt_spin_lock(spinlock_t *lock);
13252+extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
13253+extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
13254+extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
13255+extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
13256+extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
13257+extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
13258+extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
13259+extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
13260+
13261+/*
13262+ * lockdep-less calls, for derived types like rwlock:
13263+ * (for trylock they can use rt_mutex_trylock() directly.
13264+ */
13265+extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
13266+extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
13267+
13268+#define spin_lock_local(lock) rt_spin_lock(lock)
13269+#define spin_unlock_local(lock) rt_spin_unlock(lock)
13270+
13271+#define spin_lock(lock) \
13272+ do { \
13273+ migrate_disable(); \
13274+ rt_spin_lock(lock); \
13275+ } while (0)
13276+
13277+#define spin_lock_bh(lock) \
13278+ do { \
13279+ local_bh_disable(); \
13280+ migrate_disable(); \
13281+ rt_spin_lock(lock); \
13282+ } while (0)
13283+
13284+#define spin_lock_irq(lock) spin_lock(lock)
13285+
13286+#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
13287+
13288+#define spin_trylock(lock) \
13289+({ \
13290+ int __locked; \
13291+ migrate_disable(); \
13292+ __locked = spin_do_trylock(lock); \
13293+ if (!__locked) \
13294+ migrate_enable(); \
13295+ __locked; \
13296+})
13297+
13298+#ifdef CONFIG_LOCKDEP
13299+# define spin_lock_nested(lock, subclass) \
13300+ do { \
13301+ migrate_disable(); \
13302+ rt_spin_lock_nested(lock, subclass); \
13303+ } while (0)
13304+
13305+# define spin_lock_irqsave_nested(lock, flags, subclass) \
13306+ do { \
13307+ typecheck(unsigned long, flags); \
13308+ flags = 0; \
13309+ migrate_disable(); \
13310+ rt_spin_lock_nested(lock, subclass); \
13311+ } while (0)
13312+#else
13313+# define spin_lock_nested(lock, subclass) spin_lock(lock)
13314+
13315+# define spin_lock_irqsave_nested(lock, flags, subclass) \
13316+ do { \
13317+ typecheck(unsigned long, flags); \
13318+ flags = 0; \
13319+ spin_lock(lock); \
13320+ } while (0)
13321+#endif
13322+
13323+#define spin_lock_irqsave(lock, flags) \
13324+ do { \
13325+ typecheck(unsigned long, flags); \
13326+ flags = 0; \
13327+ spin_lock(lock); \
13328+ } while (0)
13329+
13330+static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
13331+{
13332+ unsigned long flags = 0;
13333+#ifdef CONFIG_TRACE_IRQFLAGS
13334+ flags = rt_spin_lock_trace_flags(lock);
13335+#else
13336+ spin_lock(lock); /* lock_local */
13337+#endif
13338+ return flags;
13339+}
13340+
13341+/* FIXME: we need rt_spin_lock_nest_lock */
13342+#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
13343+
13344+#define spin_unlock(lock) \
13345+ do { \
13346+ rt_spin_unlock(lock); \
13347+ migrate_enable(); \
13348+ } while (0)
13349+
13350+#define spin_unlock_bh(lock) \
13351+ do { \
13352+ rt_spin_unlock(lock); \
13353+ migrate_enable(); \
13354+ local_bh_enable(); \
13355+ } while (0)
13356+
13357+#define spin_unlock_irq(lock) spin_unlock(lock)
13358+
13359+#define spin_unlock_irqrestore(lock, flags) \
13360+ do { \
13361+ typecheck(unsigned long, flags); \
13362+ (void) flags; \
13363+ spin_unlock(lock); \
13364+ } while (0)
13365+
13366+#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
13367+#define spin_trylock_irq(lock) spin_trylock(lock)
13368+
13369+#define spin_trylock_irqsave(lock, flags) \
13370+ rt_spin_trylock_irqsave(lock, &(flags))
13371+
13372+#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock)
13373+
13374+#ifdef CONFIG_GENERIC_LOCKBREAK
13375+# define spin_is_contended(lock) ((lock)->break_lock)
13376+#else
13377+# define spin_is_contended(lock) (((void)(lock), 0))
13378+#endif
13379+
13380+static inline int spin_can_lock(spinlock_t *lock)
13381+{
13382+ return !rt_mutex_is_locked(&lock->lock);
13383+}
13384+
13385+static inline int spin_is_locked(spinlock_t *lock)
13386+{
13387+ return rt_mutex_is_locked(&lock->lock);
13388+}
13389+
13390+static inline void assert_spin_locked(spinlock_t *lock)
13391+{
13392+ BUG_ON(!spin_is_locked(lock));
13393+}
13394+
13395+#define atomic_dec_and_lock(atomic, lock) \
13396+ atomic_dec_and_spin_lock(atomic, lock)
13397+
13398+#endif
13399diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
13400index 73548eb..10bac71 100644
13401--- a/include/linux/spinlock_types.h
13402+++ b/include/linux/spinlock_types.h
13403@@ -9,80 +9,15 @@
13404 * Released under the General Public License (GPL).
13405 */
13406
13407-#if defined(CONFIG_SMP)
13408-# include <asm/spinlock_types.h>
13409-#else
13410-# include <linux/spinlock_types_up.h>
13411-#endif
13412-
13413-#include <linux/lockdep.h>
13414-
13415-typedef struct raw_spinlock {
13416- arch_spinlock_t raw_lock;
13417-#ifdef CONFIG_GENERIC_LOCKBREAK
13418- unsigned int break_lock;
13419-#endif
13420-#ifdef CONFIG_DEBUG_SPINLOCK
13421- unsigned int magic, owner_cpu;
13422- void *owner;
13423-#endif
13424-#ifdef CONFIG_DEBUG_LOCK_ALLOC
13425- struct lockdep_map dep_map;
13426-#endif
13427-} raw_spinlock_t;
13428-
13429-#define SPINLOCK_MAGIC 0xdead4ead
13430-
13431-#define SPINLOCK_OWNER_INIT ((void *)-1L)
13432-
13433-#ifdef CONFIG_DEBUG_LOCK_ALLOC
13434-# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
13435-#else
13436-# define SPIN_DEP_MAP_INIT(lockname)
13437-#endif
13438+#include <linux/spinlock_types_raw.h>
13439
13440-#ifdef CONFIG_DEBUG_SPINLOCK
13441-# define SPIN_DEBUG_INIT(lockname) \
13442- .magic = SPINLOCK_MAGIC, \
13443- .owner_cpu = -1, \
13444- .owner = SPINLOCK_OWNER_INIT,
13445+#ifndef CONFIG_PREEMPT_RT_FULL
13446+# include <linux/spinlock_types_nort.h>
13447+# include <linux/rwlock_types.h>
13448 #else
13449-# define SPIN_DEBUG_INIT(lockname)
13450+# include <linux/rtmutex.h>
13451+# include <linux/spinlock_types_rt.h>
13452+# include <linux/rwlock_types_rt.h>
13453 #endif
13454
13455-#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
13456- { \
13457- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
13458- SPIN_DEBUG_INIT(lockname) \
13459- SPIN_DEP_MAP_INIT(lockname) }
13460-
13461-#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
13462- (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
13463-
13464-#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
13465-
13466-typedef struct spinlock {
13467- union {
13468- struct raw_spinlock rlock;
13469-
13470-#ifdef CONFIG_DEBUG_LOCK_ALLOC
13471-# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
13472- struct {
13473- u8 __padding[LOCK_PADSIZE];
13474- struct lockdep_map dep_map;
13475- };
13476-#endif
13477- };
13478-} spinlock_t;
13479-
13480-#define __SPIN_LOCK_INITIALIZER(lockname) \
13481- { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
13482-
13483-#define __SPIN_LOCK_UNLOCKED(lockname) \
13484- (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
13485-
13486-#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
13487-
13488-#include <linux/rwlock_types.h>
13489-
13490 #endif /* __LINUX_SPINLOCK_TYPES_H */
13491diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h
13492new file mode 100644
13493index 0000000..f1dac1f
13494--- /dev/null
13495+++ b/include/linux/spinlock_types_nort.h
13496@@ -0,0 +1,33 @@
13497+#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
13498+#define __LINUX_SPINLOCK_TYPES_NORT_H
13499+
13500+#ifndef __LINUX_SPINLOCK_TYPES_H
13501+#error "Do not include directly. Include spinlock_types.h instead"
13502+#endif
13503+
13504+/*
13505+ * The non RT version maps spinlocks to raw_spinlocks
13506+ */
13507+typedef struct spinlock {
13508+ union {
13509+ struct raw_spinlock rlock;
13510+
13511+#ifdef CONFIG_DEBUG_LOCK_ALLOC
13512+# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
13513+ struct {
13514+ u8 __padding[LOCK_PADSIZE];
13515+ struct lockdep_map dep_map;
13516+ };
13517+#endif
13518+ };
13519+} spinlock_t;
13520+
13521+#define __SPIN_LOCK_INITIALIZER(lockname) \
13522+ { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
13523+
13524+#define __SPIN_LOCK_UNLOCKED(lockname) \
13525+ (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
13526+
13527+#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
13528+
13529+#endif
13530diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h
13531new file mode 100644
13532index 0000000..edffc4d
13533--- /dev/null
13534+++ b/include/linux/spinlock_types_raw.h
13535@@ -0,0 +1,56 @@
13536+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
13537+#define __LINUX_SPINLOCK_TYPES_RAW_H
13538+
13539+#if defined(CONFIG_SMP)
13540+# include <asm/spinlock_types.h>
13541+#else
13542+# include <linux/spinlock_types_up.h>
13543+#endif
13544+
13545+#include <linux/lockdep.h>
13546+
13547+typedef struct raw_spinlock {
13548+ arch_spinlock_t raw_lock;
13549+#ifdef CONFIG_GENERIC_LOCKBREAK
13550+ unsigned int break_lock;
13551+#endif
13552+#ifdef CONFIG_DEBUG_SPINLOCK
13553+ unsigned int magic, owner_cpu;
13554+ void *owner;
13555+#endif
13556+#ifdef CONFIG_DEBUG_LOCK_ALLOC
13557+ struct lockdep_map dep_map;
13558+#endif
13559+} raw_spinlock_t;
13560+
13561+#define SPINLOCK_MAGIC 0xdead4ead
13562+
13563+#define SPINLOCK_OWNER_INIT ((void *)-1L)
13564+
13565+#ifdef CONFIG_DEBUG_LOCK_ALLOC
13566+# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
13567+#else
13568+# define SPIN_DEP_MAP_INIT(lockname)
13569+#endif
13570+
13571+#ifdef CONFIG_DEBUG_SPINLOCK
13572+# define SPIN_DEBUG_INIT(lockname) \
13573+ .magic = SPINLOCK_MAGIC, \
13574+ .owner_cpu = -1, \
13575+ .owner = SPINLOCK_OWNER_INIT,
13576+#else
13577+# define SPIN_DEBUG_INIT(lockname)
13578+#endif
13579+
13580+#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
13581+ { \
13582+ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
13583+ SPIN_DEBUG_INIT(lockname) \
13584+ SPIN_DEP_MAP_INIT(lockname) }
13585+
13586+#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
13587+ (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
13588+
13589+#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
13590+
13591+#endif
13592diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h
13593new file mode 100644
13594index 0000000..9fd4319
13595--- /dev/null
13596+++ b/include/linux/spinlock_types_rt.h
13597@@ -0,0 +1,51 @@
13598+#ifndef __LINUX_SPINLOCK_TYPES_RT_H
13599+#define __LINUX_SPINLOCK_TYPES_RT_H
13600+
13601+#ifndef __LINUX_SPINLOCK_TYPES_H
13602+#error "Do not include directly. Include spinlock_types.h instead"
13603+#endif
13604+
13605+#include <linux/cache.h>
13606+
13607+/*
13608+ * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
13609+ */
13610+typedef struct spinlock {
13611+ struct rt_mutex lock;
13612+ unsigned int break_lock;
13613+#ifdef CONFIG_DEBUG_LOCK_ALLOC
13614+ struct lockdep_map dep_map;
13615+#endif
13616+} spinlock_t;
13617+
13618+#ifdef CONFIG_DEBUG_RT_MUTEXES
13619+# define __RT_SPIN_INITIALIZER(name) \
13620+ { \
13621+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
13622+ .save_state = 1, \
13623+ .file = __FILE__, \
13624+ .line = __LINE__ , \
13625+ }
13626+#else
13627+# define __RT_SPIN_INITIALIZER(name) \
13628+ { \
13629+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
13630+ .save_state = 1, \
13631+ }
13632+#endif
13633+
13634+/*
13635+.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
13636+*/
13637+
13638+#define __SPIN_LOCK_UNLOCKED(name) \
13639+ { .lock = __RT_SPIN_INITIALIZER(name.lock), \
13640+ SPIN_DEP_MAP_INIT(name) }
13641+
13642+#define __DEFINE_SPINLOCK(name) \
13643+ spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
13644+
13645+#define DEFINE_SPINLOCK(name) \
13646+ spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name)
13647+
13648+#endif
13649diff --git a/include/linux/srcu.h b/include/linux/srcu.h
13650index 6eb691b..fe9efd4 100644
13651--- a/include/linux/srcu.h
13652+++ b/include/linux/srcu.h
13653@@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct *sp);
13654
13655 void process_srcu(struct work_struct *work);
13656
13657-#define __SRCU_STRUCT_INIT(name) \
13658+#define __SRCU_STRUCT_INIT(name, pcpu_name) \
13659 { \
13660 .completed = -300, \
13661- .per_cpu_ref = &name##_srcu_array, \
13662+ .per_cpu_ref = &pcpu_name, \
13663 .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
13664 .running = false, \
13665 .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
13666@@ -102,13 +102,13 @@ void process_srcu(struct work_struct *work);
13667 * define and init a srcu struct at build time.
13668 * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it.
13669 */
13670-#define DEFINE_SRCU(name) \
13671+#define _DEFINE_SRCU(name, mod) \
13672 static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
13673- struct srcu_struct name = __SRCU_STRUCT_INIT(name);
13674+ mod struct srcu_struct name = \
13675+ __SRCU_STRUCT_INIT(name, name##_srcu_array);
13676
13677-#define DEFINE_STATIC_SRCU(name) \
13678- static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
13679- static struct srcu_struct name = __SRCU_STRUCT_INIT(name);
13680+#define DEFINE_SRCU(name) _DEFINE_SRCU(name, )
13681+#define DEFINE_STATIC_SRCU(name) _DEFINE_SRCU(name, static)
13682
13683 /**
13684 * call_srcu() - Queue a callback for invocation after an SRCU grace period
13685diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
13686index 14a8ff2..b15655f 100644
13687--- a/include/linux/sysctl.h
13688+++ b/include/linux/sysctl.h
13689@@ -25,6 +25,7 @@
13690 #include <linux/rcupdate.h>
13691 #include <linux/wait.h>
13692 #include <linux/rbtree.h>
13693+#include <linux/atomic.h>
13694 #include <uapi/linux/sysctl.h>
13695
13696 /* For the /proc/sys support */
13697diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
13698index e1d558e..2adf9c3 100644
13699--- a/include/linux/timekeeper_internal.h
13700+++ b/include/linux/timekeeper_internal.h
13701@@ -20,6 +20,8 @@ struct timekeeper {
13702 u32 shift;
13703 /* Number of clock cycles in one NTP interval. */
13704 cycle_t cycle_interval;
13705+ /* Last cycle value (also stored in clock->cycle_last) */
13706+ cycle_t cycle_last;
13707 /* Number of clock shifted nano seconds in one NTP interval. */
13708 u64 xtime_interval;
13709 /* shifted nano seconds left over when rounding cycle_interval */
13710@@ -62,8 +64,6 @@ struct timekeeper {
13711 ktime_t offs_boot;
13712 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
13713 struct timespec raw_time;
13714- /* Seqlock for all timekeeper values */
13715- seqlock_t lock;
13716 };
13717
13718 static inline struct timespec tk_xtime(struct timekeeper *tk)
13719diff --git a/include/linux/timer.h b/include/linux/timer.h
13720index 8c5a197..5fcd72c 100644
13721--- a/include/linux/timer.h
13722+++ b/include/linux/timer.h
13723@@ -241,7 +241,7 @@ extern void add_timer(struct timer_list *timer);
13724
13725 extern int try_to_del_timer_sync(struct timer_list *timer);
13726
13727-#ifdef CONFIG_SMP
13728+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
13729 extern int del_timer_sync(struct timer_list *timer);
13730 #else
13731 # define del_timer_sync(t) del_timer(t)
13732diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
13733index 5ca0951..44b3751 100644
13734--- a/include/linux/uaccess.h
13735+++ b/include/linux/uaccess.h
13736@@ -6,38 +6,37 @@
13737
13738 /*
13739 * These routines enable/disable the pagefault handler in that
13740- * it will not take any locks and go straight to the fixup table.
13741- *
13742- * They have great resemblance to the preempt_disable/enable calls
13743- * and in fact they are identical; this is because currently there is
13744- * no other way to make the pagefault handlers do this. So we do
13745- * disable preemption but we don't necessarily care about that.
13746+ * it will not take any MM locks and go straight to the fixup table.
13747 */
13748-static inline void pagefault_disable(void)
13749+static inline void raw_pagefault_disable(void)
13750 {
13751 inc_preempt_count();
13752- /*
13753- * make sure to have issued the store before a pagefault
13754- * can hit.
13755- */
13756 barrier();
13757 }
13758
13759-static inline void pagefault_enable(void)
13760+static inline void raw_pagefault_enable(void)
13761 {
13762- /*
13763- * make sure to issue those last loads/stores before enabling
13764- * the pagefault handler again.
13765- */
13766 barrier();
13767 dec_preempt_count();
13768- /*
13769- * make sure we do..
13770- */
13771 barrier();
13772 preempt_check_resched();
13773 }
13774
13775+#ifndef CONFIG_PREEMPT_RT_FULL
13776+static inline void pagefault_disable(void)
13777+{
13778+ raw_pagefault_disable();
13779+}
13780+
13781+static inline void pagefault_enable(void)
13782+{
13783+ raw_pagefault_enable();
13784+}
13785+#else
13786+extern void pagefault_disable(void);
13787+extern void pagefault_enable(void);
13788+#endif
13789+
13790 #ifndef ARCH_HAS_NOCACHE_UACCESS
13791
13792 static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
13793@@ -77,9 +76,9 @@ static inline unsigned long __copy_from_user_nocache(void *to,
13794 mm_segment_t old_fs = get_fs(); \
13795 \
13796 set_fs(KERNEL_DS); \
13797- pagefault_disable(); \
13798+ raw_pagefault_disable(); \
13799 ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \
13800- pagefault_enable(); \
13801+ raw_pagefault_enable(); \
13802 set_fs(old_fs); \
13803 ret; \
13804 })
13805diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
13806index 4f628a6..ef03e33 100644
13807--- a/include/linux/uprobes.h
13808+++ b/include/linux/uprobes.h
13809@@ -26,6 +26,7 @@
13810
13811 #include <linux/errno.h>
13812 #include <linux/rbtree.h>
13813+#include <linux/wait.h>
13814
13815 struct vm_area_struct;
13816 struct mm_struct;
13817diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
13818index a13291f..839806b 100644
13819--- a/include/linux/vmstat.h
13820+++ b/include/linux/vmstat.h
13821@@ -29,7 +29,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
13822
13823 static inline void __count_vm_event(enum vm_event_item item)
13824 {
13825+ preempt_disable_rt();
13826 __this_cpu_inc(vm_event_states.event[item]);
13827+ preempt_enable_rt();
13828 }
13829
13830 static inline void count_vm_event(enum vm_event_item item)
13831@@ -39,7 +41,9 @@ static inline void count_vm_event(enum vm_event_item item)
13832
13833 static inline void __count_vm_events(enum vm_event_item item, long delta)
13834 {
13835+ preempt_disable_rt();
13836 __this_cpu_add(vm_event_states.event[item], delta);
13837+ preempt_enable_rt();
13838 }
13839
13840 static inline void count_vm_events(enum vm_event_item item, long delta)
13841diff --git a/include/linux/wait-simple.h b/include/linux/wait-simple.h
13842new file mode 100644
13843index 0000000..4efba4d
13844--- /dev/null
13845+++ b/include/linux/wait-simple.h
13846@@ -0,0 +1,199 @@
13847+#ifndef _LINUX_WAIT_SIMPLE_H
13848+#define _LINUX_WAIT_SIMPLE_H
13849+
13850+#include <linux/spinlock.h>
13851+#include <linux/list.h>
13852+
13853+#include <asm/current.h>
13854+
13855+struct swaiter {
13856+ struct task_struct *task;
13857+ struct list_head node;
13858+};
13859+
13860+#define DEFINE_SWAITER(name) \
13861+ struct swaiter name = { \
13862+ .task = current, \
13863+ .node = LIST_HEAD_INIT((name).node), \
13864+ }
13865+
13866+struct swait_head {
13867+ raw_spinlock_t lock;
13868+ struct list_head list;
13869+};
13870+
13871+#define SWAIT_HEAD_INITIALIZER(name) { \
13872+ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
13873+ .list = LIST_HEAD_INIT((name).list), \
13874+ }
13875+
13876+#define DEFINE_SWAIT_HEAD(name) \
13877+ struct swait_head name = SWAIT_HEAD_INITIALIZER(name)
13878+
13879+extern void __init_swait_head(struct swait_head *h, struct lock_class_key *key);
13880+
13881+#define init_swait_head(swh) \
13882+ do { \
13883+ static struct lock_class_key __key; \
13884+ \
13885+ __init_swait_head((swh), &__key); \
13886+ } while (0)
13887+
13888+/*
13889+ * Waiter functions
13890+ */
13891+extern void swait_prepare_locked(struct swait_head *head, struct swaiter *w);
13892+extern void swait_prepare(struct swait_head *head, struct swaiter *w, int state);
13893+extern void swait_finish_locked(struct swait_head *head, struct swaiter *w);
13894+extern void swait_finish(struct swait_head *head, struct swaiter *w);
13895+
13896+/*
13897+ * Wakeup functions
13898+ */
13899+extern unsigned int __swait_wake(struct swait_head *head, unsigned int state, unsigned int num);
13900+extern unsigned int __swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num);
13901+
13902+#define swait_wake(head) __swait_wake(head, TASK_NORMAL, 1)
13903+#define swait_wake_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 1)
13904+#define swait_wake_all(head) __swait_wake(head, TASK_NORMAL, 0)
13905+#define swait_wake_all_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 0)
13906+
13907+/*
13908+ * Event API
13909+ */
13910+#define __swait_event(wq, condition) \
13911+do { \
13912+ DEFINE_SWAITER(__wait); \
13913+ \
13914+ for (;;) { \
13915+ swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
13916+ if (condition) \
13917+ break; \
13918+ schedule(); \
13919+ } \
13920+ swait_finish(&wq, &__wait); \
13921+} while (0)
13922+
13923+/**
13924+ * swait_event - sleep until a condition gets true
13925+ * @wq: the waitqueue to wait on
13926+ * @condition: a C expression for the event to wait for
13927+ *
13928+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
13929+ * @condition evaluates to true. The @condition is checked each time
13930+ * the waitqueue @wq is woken up.
13931+ *
13932+ * wake_up() has to be called after changing any variable that could
13933+ * change the result of the wait condition.
13934+ */
13935+#define swait_event(wq, condition) \
13936+do { \
13937+ if (condition) \
13938+ break; \
13939+ __swait_event(wq, condition); \
13940+} while (0)
13941+
13942+#define __swait_event_interruptible(wq, condition, ret) \
13943+do { \
13944+ DEFINE_SWAITER(__wait); \
13945+ \
13946+ for (;;) { \
13947+ swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
13948+ if (condition) \
13949+ break; \
13950+ if (signal_pending(current)) { \
13951+ ret = -ERESTARTSYS; \
13952+ break; \
13953+ } \
13954+ schedule(); \
13955+ } \
13956+ swait_finish(&wq, &__wait); \
13957+} while (0)
13958+
13959+#define __swait_event_interruptible_timeout(wq, condition, ret) \
13960+do { \
13961+ DEFINE_SWAITER(__wait); \
13962+ \
13963+ for (;;) { \
13964+ swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
13965+ if (condition) \
13966+ break; \
13967+ if (signal_pending(current)) { \
13968+ ret = -ERESTARTSYS; \
13969+ break; \
13970+ } \
13971+ ret = schedule_timeout(ret); \
13972+ if (!ret) \
13973+ break; \
13974+ } \
13975+ swait_finish(&wq, &__wait); \
13976+} while (0)
13977+
13978+/**
13979+ * swait_event_interruptible - sleep until a condition gets true
13980+ * @wq: the waitqueue to wait on
13981+ * @condition: a C expression for the event to wait for
13982+ *
13983+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
13984+ * @condition evaluates to true. The @condition is checked each time
13985+ * the waitqueue @wq is woken up.
13986+ *
13987+ * wake_up() has to be called after changing any variable that could
13988+ * change the result of the wait condition.
13989+ */
13990+#define swait_event_interruptible(wq, condition) \
13991+({ \
13992+ int __ret = 0; \
13993+ if (!(condition)) \
13994+ __swait_event_interruptible(wq, condition, __ret); \
13995+ __ret; \
13996+})
13997+
13998+#define swait_event_interruptible_timeout(wq, condition, timeout) \
13999+({ \
14000+ int __ret = timeout; \
14001+ if (!(condition)) \
14002+ __swait_event_interruptible_timeout(wq, condition, __ret); \
14003+ __ret; \
14004+})
14005+
14006+#define __swait_event_timeout(wq, condition, ret) \
14007+do { \
14008+ DEFINE_SWAITER(__wait); \
14009+ \
14010+ for (;;) { \
14011+ swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
14012+ if (condition) \
14013+ break; \
14014+ ret = schedule_timeout(ret); \
14015+ if (!ret) \
14016+ break; \
14017+ } \
14018+ swait_finish(&wq, &__wait); \
14019+} while (0)
14020+
14021+/**
14022+ * swait_event_timeout - sleep until a condition gets true or a timeout elapses
14023+ * @wq: the waitqueue to wait on
14024+ * @condition: a C expression for the event to wait for
14025+ * @timeout: timeout, in jiffies
14026+ *
14027+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
14028+ * @condition evaluates to true. The @condition is checked each time
14029+ * the waitqueue @wq is woken up.
14030+ *
14031+ * wake_up() has to be called after changing any variable that could
14032+ * change the result of the wait condition.
14033+ *
14034+ * The function returns 0 if the @timeout elapsed, and the remaining
14035+ * jiffies if the condition evaluated to true before the timeout elapsed.
14036+ */
14037+#define swait_event_timeout(wq, condition, timeout) \
14038+({ \
14039+ long __ret = timeout; \
14040+ if (!(condition)) \
14041+ __swait_event_timeout(wq, condition, __ret); \
14042+ __ret; \
14043+})
14044+
14045+#endif
14046diff --git a/include/net/dst.h b/include/net/dst.h
14047index b3ebe17..446d7b1 100644
14048--- a/include/net/dst.h
14049+++ b/include/net/dst.h
14050@@ -392,7 +392,7 @@ static inline void dst_confirm(struct dst_entry *dst)
14051 static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
14052 struct sk_buff *skb)
14053 {
14054- const struct hh_cache *hh;
14055+ struct hh_cache *hh;
14056
14057 if (dst->pending_confirm) {
14058 unsigned long now = jiffies;
14059diff --git a/include/net/neighbour.h b/include/net/neighbour.h
14060index 0dab173..f28b70c 100644
14061--- a/include/net/neighbour.h
14062+++ b/include/net/neighbour.h
14063@@ -334,7 +334,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
14064 }
14065 #endif
14066
14067-static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
14068+static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
14069 {
14070 unsigned int seq;
14071 int hh_len;
14072@@ -389,7 +389,7 @@ struct neighbour_cb {
14073
14074 #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
14075
14076-static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
14077+static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
14078 const struct net_device *dev)
14079 {
14080 unsigned int seq;
14081diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
14082index 2ae2b83..17e815d 100644
14083--- a/include/net/netns/ipv4.h
14084+++ b/include/net/netns/ipv4.h
14085@@ -56,6 +56,7 @@ struct netns_ipv4 {
14086
14087 int sysctl_icmp_echo_ignore_all;
14088 int sysctl_icmp_echo_ignore_broadcasts;
14089+ int sysctl_icmp_echo_sysrq;
14090 int sysctl_icmp_ignore_bogus_error_responses;
14091 int sysctl_icmp_ratelimit;
14092 int sysctl_icmp_ratemask;
14093diff --git a/include/net/tcp.h b/include/net/tcp.h
14094index aed42c7..4da2167 100644
14095--- a/include/net/tcp.h
14096+++ b/include/net/tcp.h
14097@@ -1045,6 +1045,7 @@ static inline bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
14098 if (sysctl_tcp_low_latency || !tp->ucopy.task)
14099 return false;
14100
14101+ skb_dst_force(skb);
14102 __skb_queue_tail(&tp->ucopy.prequeue, skb);
14103 tp->ucopy.memory += skb->truesize;
14104 if (tp->ucopy.memory > sk->sk_rcvbuf) {
14105diff --git a/include/trace/events/hist.h b/include/trace/events/hist.h
14106new file mode 100644
14107index 0000000..28646db
14108--- /dev/null
14109+++ b/include/trace/events/hist.h
14110@@ -0,0 +1,69 @@
14111+#undef TRACE_SYSTEM
14112+#define TRACE_SYSTEM hist
14113+
14114+#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
14115+#define _TRACE_HIST_H
14116+
14117+#include "latency_hist.h"
14118+#include <linux/tracepoint.h>
14119+
14120+#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
14121+#define trace_preemptirqsoff_hist(a,b)
14122+#else
14123+TRACE_EVENT(preemptirqsoff_hist,
14124+
14125+ TP_PROTO(int reason, int starthist),
14126+
14127+ TP_ARGS(reason, starthist),
14128+
14129+ TP_STRUCT__entry(
14130+ __field(int, reason )
14131+ __field(int, starthist )
14132+ ),
14133+
14134+ TP_fast_assign(
14135+ __entry->reason = reason;
14136+ __entry->starthist = starthist;
14137+ ),
14138+
14139+ TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
14140+ __entry->starthist ? "start" : "stop")
14141+);
14142+#endif
14143+
14144+#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
14145+#define trace_hrtimer_interrupt(a,b,c,d)
14146+#else
14147+TRACE_EVENT(hrtimer_interrupt,
14148+
14149+ TP_PROTO(int cpu, long long offset, struct task_struct *curr, struct task_struct *task),
14150+
14151+ TP_ARGS(cpu, offset, curr, task),
14152+
14153+ TP_STRUCT__entry(
14154+ __field(int, cpu )
14155+ __field(long long, offset )
14156+ __array(char, ccomm, TASK_COMM_LEN)
14157+ __field(int, cprio )
14158+ __array(char, tcomm, TASK_COMM_LEN)
14159+ __field(int, tprio )
14160+ ),
14161+
14162+ TP_fast_assign(
14163+ __entry->cpu = cpu;
14164+ __entry->offset = offset;
14165+ memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
14166+ __entry->cprio = curr->prio;
14167+ memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>", task != NULL ? TASK_COMM_LEN : 7);
14168+ __entry->tprio = task != NULL ? task->prio : -1;
14169+ ),
14170+
14171+ TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
14172+ __entry->cpu, __entry->offset, __entry->ccomm, __entry->cprio, __entry->tcomm, __entry->tprio)
14173+);
14174+#endif
14175+
14176+#endif /* _TRACE_HIST_H */
14177+
14178+/* This part must be outside protection */
14179+#include <trace/define_trace.h>
14180diff --git a/include/trace/events/latency_hist.h b/include/trace/events/latency_hist.h
14181new file mode 100644
14182index 0000000..7f70794
14183--- /dev/null
14184+++ b/include/trace/events/latency_hist.h
14185@@ -0,0 +1,29 @@
14186+#ifndef _LATENCY_HIST_H
14187+#define _LATENCY_HIST_H
14188+
14189+enum hist_action {
14190+ IRQS_ON,
14191+ PREEMPT_ON,
14192+ TRACE_STOP,
14193+ IRQS_OFF,
14194+ PREEMPT_OFF,
14195+ TRACE_START,
14196+};
14197+
14198+static char *actions[] = {
14199+ "IRQS_ON",
14200+ "PREEMPT_ON",
14201+ "TRACE_STOP",
14202+ "IRQS_OFF",
14203+ "PREEMPT_OFF",
14204+ "TRACE_START",
14205+};
14206+
14207+static inline char *getaction(int action)
14208+{
14209+ if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
14210+ return(actions[action]);
14211+ return("unknown");
14212+}
14213+
14214+#endif /* _LATENCY_HIST_H */
14215diff --git a/init/Kconfig b/init/Kconfig
14216index be8b7f5..d0590c7 100644
14217--- a/init/Kconfig
14218+++ b/init/Kconfig
14219@@ -579,7 +579,7 @@ config RCU_FANOUT_EXACT
14220
14221 config RCU_FAST_NO_HZ
14222 bool "Accelerate last non-dyntick-idle CPU's grace periods"
14223- depends on NO_HZ && SMP
14224+ depends on NO_HZ && SMP && !PREEMPT_RT_FULL
14225 default n
14226 help
14227 This option causes RCU to attempt to accelerate grace periods in
14228@@ -946,6 +946,7 @@ config RT_GROUP_SCHED
14229 bool "Group scheduling for SCHED_RR/FIFO"
14230 depends on EXPERIMENTAL
14231 depends on CGROUP_SCHED
14232+ depends on !PREEMPT_RT_FULL
14233 default n
14234 help
14235 This feature lets you explicitly allocate real CPU bandwidth
14236@@ -1504,6 +1505,7 @@ config SLUB
14237 config SLOB
14238 depends on EXPERT
14239 bool "SLOB (Simple Allocator)"
14240+ depends on !PREEMPT_RT_FULL
14241 help
14242 SLOB replaces the stock allocator with a drastically simpler
14243 allocator. SLOB is generally more space efficient but
14244diff --git a/init/Makefile b/init/Makefile
14245index 7bc47ee..88cf473 100644
14246--- a/init/Makefile
14247+++ b/init/Makefile
14248@@ -33,4 +33,4 @@ silent_chk_compile.h = :
14249 include/generated/compile.h: FORCE
14250 @$($(quiet)chk_compile.h)
14251 $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
14252- "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
14253+ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
14254diff --git a/init/main.c b/init/main.c
14255index cee4b5c..2d80a11 100644
14256--- a/init/main.c
14257+++ b/init/main.c
14258@@ -70,6 +70,7 @@
14259 #include <linux/perf_event.h>
14260 #include <linux/file.h>
14261 #include <linux/ptrace.h>
14262+#include <linux/posix-timers.h>
14263
14264 #include <asm/io.h>
14265 #include <asm/bugs.h>
14266@@ -502,6 +503,7 @@ asmlinkage void __init start_kernel(void)
14267 setup_command_line(command_line);
14268 setup_nr_cpu_ids();
14269 setup_per_cpu_areas();
14270+ softirq_early_init();
14271 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
14272
14273 build_all_zonelists(NULL, NULL);
14274diff --git a/ipc/mqueue.c b/ipc/mqueue.c
14275index f3f40dc..4f7d959 100644
14276--- a/ipc/mqueue.c
14277+++ b/ipc/mqueue.c
14278@@ -921,12 +921,17 @@ static inline void pipelined_send(struct mqueue_inode_info *info,
14279 struct msg_msg *message,
14280 struct ext_wait_queue *receiver)
14281 {
14282+ /*
14283+ * Keep them in one critical section for PREEMPT_RT:
14284+ */
14285+ preempt_disable_rt();
14286 receiver->msg = message;
14287 list_del(&receiver->list);
14288 receiver->state = STATE_PENDING;
14289 wake_up_process(receiver->task);
14290 smp_wmb();
14291 receiver->state = STATE_READY;
14292+ preempt_enable_rt();
14293 }
14294
14295 /* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
14296@@ -940,13 +945,18 @@ static inline void pipelined_receive(struct mqueue_inode_info *info)
14297 wake_up_interruptible(&info->wait_q);
14298 return;
14299 }
14300- if (msg_insert(sender->msg, info))
14301- return;
14302- list_del(&sender->list);
14303- sender->state = STATE_PENDING;
14304- wake_up_process(sender->task);
14305- smp_wmb();
14306- sender->state = STATE_READY;
14307+ /*
14308+ * Keep them in one critical section for PREEMPT_RT:
14309+ */
14310+ preempt_disable_rt();
14311+ if (!msg_insert(sender->msg, info)) {
14312+ list_del(&sender->list);
14313+ sender->state = STATE_PENDING;
14314+ wake_up_process(sender->task);
14315+ smp_wmb();
14316+ sender->state = STATE_READY;
14317+ }
14318+ preempt_enable_rt();
14319 }
14320
14321 SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
14322diff --git a/ipc/msg.c b/ipc/msg.c
14323index fede1d0..0b60596 100644
14324--- a/ipc/msg.c
14325+++ b/ipc/msg.c
14326@@ -259,12 +259,20 @@ static void expunge_all(struct msg_queue *msq, int res)
14327 while (tmp != &msq->q_receivers) {
14328 struct msg_receiver *msr;
14329
14330+ /*
14331+ * Make sure that the wakeup doesnt preempt
14332+ * this CPU prematurely. (on PREEMPT_RT)
14333+ */
14334+ preempt_disable_rt();
14335+
14336 msr = list_entry(tmp, struct msg_receiver, r_list);
14337 tmp = tmp->next;
14338 msr->r_msg = NULL;
14339 wake_up_process(msr->r_tsk);
14340 smp_mb();
14341 msr->r_msg = ERR_PTR(res);
14342+
14343+ preempt_enable_rt();
14344 }
14345 }
14346
14347@@ -614,6 +622,12 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
14348 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
14349 msr->r_msgtype, msr->r_mode)) {
14350
14351+ /*
14352+ * Make sure that the wakeup doesnt preempt
14353+ * this CPU prematurely. (on PREEMPT_RT)
14354+ */
14355+ preempt_disable_rt();
14356+
14357 list_del(&msr->r_list);
14358 if (msr->r_maxsize < msg->m_ts) {
14359 msr->r_msg = NULL;
14360@@ -627,9 +641,11 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
14361 wake_up_process(msr->r_tsk);
14362 smp_mb();
14363 msr->r_msg = msg;
14364+ preempt_enable_rt();
14365
14366 return 1;
14367 }
14368+ preempt_enable_rt();
14369 }
14370 }
14371 return 0;
14372diff --git a/ipc/sem.c b/ipc/sem.c
14373index 58d31f1..d7bdd84 100644
14374--- a/ipc/sem.c
14375+++ b/ipc/sem.c
14376@@ -461,6 +461,13 @@ undo:
14377 static void wake_up_sem_queue_prepare(struct list_head *pt,
14378 struct sem_queue *q, int error)
14379 {
14380+#ifdef CONFIG_PREEMPT_RT_BASE
14381+ struct task_struct *p = q->sleeper;
14382+ get_task_struct(p);
14383+ q->status = error;
14384+ wake_up_process(p);
14385+ put_task_struct(p);
14386+#else
14387 if (list_empty(pt)) {
14388 /*
14389 * Hold preempt off so that we don't get preempted and have the
14390@@ -472,6 +479,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
14391 q->pid = error;
14392
14393 list_add_tail(&q->simple_list, pt);
14394+#endif
14395 }
14396
14397 /**
14398@@ -485,6 +493,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
14399 */
14400 static void wake_up_sem_queue_do(struct list_head *pt)
14401 {
14402+#ifndef CONFIG_PREEMPT_RT_BASE
14403 struct sem_queue *q, *t;
14404 int did_something;
14405
14406@@ -497,6 +506,7 @@ static void wake_up_sem_queue_do(struct list_head *pt)
14407 }
14408 if (did_something)
14409 preempt_enable();
14410+#endif
14411 }
14412
14413 static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
14414diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
14415index 44511d1..f60af90 100644
14416--- a/kernel/Kconfig.locks
14417+++ b/kernel/Kconfig.locks
14418@@ -222,4 +222,4 @@ endif
14419
14420 config MUTEX_SPIN_ON_OWNER
14421 def_bool y
14422- depends on SMP && !DEBUG_MUTEXES
14423+ depends on SMP && !DEBUG_MUTEXES && !PREEMPT_RT_FULL
14424diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
14425index 3f9c974..38cecfe 100644
14426--- a/kernel/Kconfig.preempt
14427+++ b/kernel/Kconfig.preempt
14428@@ -1,3 +1,16 @@
14429+config PREEMPT
14430+ bool
14431+ select PREEMPT_COUNT
14432+
14433+config PREEMPT_RT_BASE
14434+ bool
14435+ select PREEMPT
14436+
14437+config HAVE_PREEMPT_LAZY
14438+ bool
14439+
14440+config PREEMPT_LAZY
14441+ def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
14442
14443 choice
14444 prompt "Preemption Model"
14445@@ -33,9 +46,9 @@ config PREEMPT_VOLUNTARY
14446
14447 Select this if you are building a kernel for a desktop system.
14448
14449-config PREEMPT
14450+config PREEMPT__LL
14451 bool "Preemptible Kernel (Low-Latency Desktop)"
14452- select PREEMPT_COUNT
14453+ select PREEMPT
14454 select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
14455 help
14456 This option reduces the latency of the kernel by making
14457@@ -52,6 +65,22 @@ config PREEMPT
14458 embedded system with latency requirements in the milliseconds
14459 range.
14460
14461+config PREEMPT_RTB
14462+ bool "Preemptible Kernel (Basic RT)"
14463+ select PREEMPT_RT_BASE
14464+ help
14465+ This option is basically the same as (Low-Latency Desktop) but
14466+ enables changes which are preliminary for the full preemptiple
14467+ RT kernel.
14468+
14469+config PREEMPT_RT_FULL
14470+ bool "Fully Preemptible Kernel (RT)"
14471+ depends on IRQ_FORCED_THREADING
14472+ select PREEMPT_RT_BASE
14473+ select PREEMPT_RCU
14474+ help
14475+ All and everything
14476+
14477 endchoice
14478
14479 config PREEMPT_COUNT
14480diff --git a/kernel/Makefile b/kernel/Makefile
14481index 6c072b6..2c7ab7f 100644
14482--- a/kernel/Makefile
14483+++ b/kernel/Makefile
14484@@ -7,10 +7,10 @@ obj-y = fork.o exec_domain.o panic.o printk.o \
14485 sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
14486 signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
14487 rcupdate.o extable.o params.o posix-timers.o \
14488- kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
14489- hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
14490+ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o \
14491+ hrtimer.o nsproxy.o srcu.o semaphore.o \
14492 notifier.o ksysfs.o cred.o \
14493- async.o range.o groups.o lglock.o smpboot.o
14494+ async.o range.o groups.o lglock.o smpboot.o wait-simple.o
14495
14496 ifdef CONFIG_FUNCTION_TRACER
14497 # Do not trace debug files and internal ftrace files
14498@@ -32,7 +32,11 @@ obj-$(CONFIG_FREEZER) += freezer.o
14499 obj-$(CONFIG_PROFILING) += profile.o
14500 obj-$(CONFIG_STACKTRACE) += stacktrace.o
14501 obj-y += time/
14502+ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
14503+obj-y += mutex.o
14504 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
14505+obj-y += rwsem.o
14506+endif
14507 obj-$(CONFIG_LOCKDEP) += lockdep.o
14508 ifeq ($(CONFIG_PROC_FS),y)
14509 obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
14510@@ -44,6 +48,7 @@ endif
14511 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
14512 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
14513 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
14514+obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o
14515 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
14516 obj-$(CONFIG_SMP) += smp.o
14517 ifneq ($(CONFIG_SMP),y)
14518diff --git a/kernel/cpu.c b/kernel/cpu.c
14519index 3046a50..d44dea3 100644
14520--- a/kernel/cpu.c
14521+++ b/kernel/cpu.c
14522@@ -63,6 +63,274 @@ static struct {
14523 .refcount = 0,
14524 };
14525
14526+/**
14527+ * hotplug_pcp - per cpu hotplug descriptor
14528+ * @unplug: set when pin_current_cpu() needs to sync tasks
14529+ * @sync_tsk: the task that waits for tasks to finish pinned sections
14530+ * @refcount: counter of tasks in pinned sections
14531+ * @grab_lock: set when the tasks entering pinned sections should wait
14532+ * @synced: notifier for @sync_tsk to tell cpu_down it's finished
14533+ * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
14534+ * @mutex_init: zero if the mutex hasn't been initialized yet.
14535+ *
14536+ * Although @unplug and @sync_tsk may point to the same task, the @unplug
14537+ * is used as a flag and still exists after @sync_tsk has exited and
14538+ * @sync_tsk set to NULL.
14539+ */
14540+struct hotplug_pcp {
14541+ struct task_struct *unplug;
14542+ struct task_struct *sync_tsk;
14543+ int refcount;
14544+ int grab_lock;
14545+ struct completion synced;
14546+#ifdef CONFIG_PREEMPT_RT_FULL
14547+ spinlock_t lock;
14548+#else
14549+ struct mutex mutex;
14550+#endif
14551+ int mutex_init;
14552+};
14553+
14554+#ifdef CONFIG_PREEMPT_RT_FULL
14555+# define hotplug_lock(hp) rt_spin_lock(&(hp)->lock)
14556+# define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock)
14557+#else
14558+# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
14559+# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
14560+#endif
14561+
14562+static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
14563+
14564+/**
14565+ * pin_current_cpu - Prevent the current cpu from being unplugged
14566+ *
14567+ * Lightweight version of get_online_cpus() to prevent cpu from being
14568+ * unplugged when code runs in a migration disabled region.
14569+ *
14570+ * Must be called with preemption disabled (preempt_count = 1)!
14571+ */
14572+void pin_current_cpu(void)
14573+{
14574+ struct hotplug_pcp *hp;
14575+ int force = 0;
14576+
14577+retry:
14578+ hp = &__get_cpu_var(hotplug_pcp);
14579+
14580+ if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
14581+ hp->unplug == current || (current->flags & PF_STOMPER)) {
14582+ hp->refcount++;
14583+ return;
14584+ }
14585+
14586+ if (hp->grab_lock) {
14587+ preempt_enable();
14588+ hotplug_lock(hp);
14589+ hotplug_unlock(hp);
14590+ } else {
14591+ preempt_enable();
14592+ /*
14593+ * Try to push this task off of this CPU.
14594+ */
14595+ if (!migrate_me()) {
14596+ preempt_disable();
14597+ hp = &__get_cpu_var(hotplug_pcp);
14598+ if (!hp->grab_lock) {
14599+ /*
14600+ * Just let it continue it's already pinned
14601+ * or about to sleep.
14602+ */
14603+ force = 1;
14604+ goto retry;
14605+ }
14606+ preempt_enable();
14607+ }
14608+ }
14609+ preempt_disable();
14610+ goto retry;
14611+}
14612+
14613+/**
14614+ * unpin_current_cpu - Allow unplug of current cpu
14615+ *
14616+ * Must be called with preemption or interrupts disabled!
14617+ */
14618+void unpin_current_cpu(void)
14619+{
14620+ struct hotplug_pcp *hp = &__get_cpu_var(hotplug_pcp);
14621+
14622+ WARN_ON(hp->refcount <= 0);
14623+
14624+ /* This is safe. sync_unplug_thread is pinned to this cpu */
14625+ if (!--hp->refcount && hp->unplug && hp->unplug != current &&
14626+ !(current->flags & PF_STOMPER))
14627+ wake_up_process(hp->unplug);
14628+}
14629+
14630+static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
14631+{
14632+ set_current_state(TASK_UNINTERRUPTIBLE);
14633+ while (hp->refcount) {
14634+ schedule_preempt_disabled();
14635+ set_current_state(TASK_UNINTERRUPTIBLE);
14636+ }
14637+}
14638+
14639+static int sync_unplug_thread(void *data)
14640+{
14641+ struct hotplug_pcp *hp = data;
14642+
14643+ preempt_disable();
14644+ hp->unplug = current;
14645+ wait_for_pinned_cpus(hp);
14646+
14647+ /*
14648+ * This thread will synchronize the cpu_down() with threads
14649+ * that have pinned the CPU. When the pinned CPU count reaches
14650+ * zero, we inform the cpu_down code to continue to the next step.
14651+ */
14652+ set_current_state(TASK_UNINTERRUPTIBLE);
14653+ preempt_enable();
14654+ complete(&hp->synced);
14655+
14656+ /*
14657+ * If all succeeds, the next step will need tasks to wait till
14658+ * the CPU is offline before continuing. To do this, the grab_lock
14659+ * is set and tasks going into pin_current_cpu() will block on the
14660+ * mutex. But we still need to wait for those that are already in
14661+ * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
14662+ * will kick this thread out.
14663+ */
14664+ while (!hp->grab_lock && !kthread_should_stop()) {
14665+ schedule();
14666+ set_current_state(TASK_UNINTERRUPTIBLE);
14667+ }
14668+
14669+ /* Make sure grab_lock is seen before we see a stale completion */
14670+ smp_mb();
14671+
14672+ /*
14673+ * Now just before cpu_down() enters stop machine, we need to make
14674+ * sure all tasks that are in pinned CPU sections are out, and new
14675+ * tasks will now grab the lock, keeping them from entering pinned
14676+ * CPU sections.
14677+ */
14678+ if (!kthread_should_stop()) {
14679+ preempt_disable();
14680+ wait_for_pinned_cpus(hp);
14681+ preempt_enable();
14682+ complete(&hp->synced);
14683+ }
14684+
14685+ set_current_state(TASK_UNINTERRUPTIBLE);
14686+ while (!kthread_should_stop()) {
14687+ schedule();
14688+ set_current_state(TASK_UNINTERRUPTIBLE);
14689+ }
14690+ set_current_state(TASK_RUNNING);
14691+
14692+ /*
14693+ * Force this thread off this CPU as it's going down and
14694+ * we don't want any more work on this CPU.
14695+ */
14696+ current->flags &= ~PF_THREAD_BOUND;
14697+ do_set_cpus_allowed(current, cpu_present_mask);
14698+ migrate_me();
14699+ return 0;
14700+}
14701+
14702+static void __cpu_unplug_sync(struct hotplug_pcp *hp)
14703+{
14704+ wake_up_process(hp->sync_tsk);
14705+ wait_for_completion(&hp->synced);
14706+}
14707+
14708+/*
14709+ * Start the sync_unplug_thread on the target cpu and wait for it to
14710+ * complete.
14711+ */
14712+static int cpu_unplug_begin(unsigned int cpu)
14713+{
14714+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
14715+ int err;
14716+
14717+ /* Protected by cpu_hotplug.lock */
14718+ if (!hp->mutex_init) {
14719+#ifdef CONFIG_PREEMPT_RT_FULL
14720+ spin_lock_init(&hp->lock);
14721+#else
14722+ mutex_init(&hp->mutex);
14723+#endif
14724+ hp->mutex_init = 1;
14725+ }
14726+
14727+ /* Inform the scheduler to migrate tasks off this CPU */
14728+ tell_sched_cpu_down_begin(cpu);
14729+
14730+ init_completion(&hp->synced);
14731+
14732+ hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
14733+ if (IS_ERR(hp->sync_tsk)) {
14734+ err = PTR_ERR(hp->sync_tsk);
14735+ hp->sync_tsk = NULL;
14736+ return err;
14737+ }
14738+ kthread_bind(hp->sync_tsk, cpu);
14739+
14740+ /*
14741+ * Wait for tasks to get out of the pinned sections,
14742+ * it's still OK if new tasks enter. Some CPU notifiers will
14743+ * wait for tasks that are going to enter these sections and
14744+ * we must not have them block.
14745+ */
14746+ __cpu_unplug_sync(hp);
14747+
14748+ return 0;
14749+}
14750+
14751+static void cpu_unplug_sync(unsigned int cpu)
14752+{
14753+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
14754+
14755+ init_completion(&hp->synced);
14756+ /* The completion needs to be initialzied before setting grab_lock */
14757+ smp_wmb();
14758+
14759+ /* Grab the mutex before setting grab_lock */
14760+ hotplug_lock(hp);
14761+ hp->grab_lock = 1;
14762+
14763+ /*
14764+ * The CPU notifiers have been completed.
14765+ * Wait for tasks to get out of pinned CPU sections and have new
14766+ * tasks block until the CPU is completely down.
14767+ */
14768+ __cpu_unplug_sync(hp);
14769+
14770+ /* All done with the sync thread */
14771+ kthread_stop(hp->sync_tsk);
14772+ hp->sync_tsk = NULL;
14773+}
14774+
14775+static void cpu_unplug_done(unsigned int cpu)
14776+{
14777+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
14778+
14779+ hp->unplug = NULL;
14780+ /* Let all tasks know cpu unplug is finished before cleaning up */
14781+ smp_wmb();
14782+
14783+ if (hp->sync_tsk)
14784+ kthread_stop(hp->sync_tsk);
14785+
14786+ if (hp->grab_lock) {
14787+ hotplug_unlock(hp);
14788+ /* protected by cpu_hotplug.lock */
14789+ hp->grab_lock = 0;
14790+ }
14791+ tell_sched_cpu_down_done(cpu);
14792+}
14793+
14794 void get_online_cpus(void)
14795 {
14796 might_sleep();
14797@@ -79,15 +347,14 @@ void put_online_cpus(void)
14798 {
14799 if (cpu_hotplug.active_writer == current)
14800 return;
14801- mutex_lock(&cpu_hotplug.lock);
14802
14803+ mutex_lock(&cpu_hotplug.lock);
14804 if (WARN_ON(!cpu_hotplug.refcount))
14805 cpu_hotplug.refcount++; /* try to fix things up */
14806
14807 if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
14808 wake_up_process(cpu_hotplug.active_writer);
14809 mutex_unlock(&cpu_hotplug.lock);
14810-
14811 }
14812 EXPORT_SYMBOL_GPL(put_online_cpus);
14813
14814@@ -260,13 +527,14 @@ static int __ref take_cpu_down(void *_param)
14815 /* Requires cpu_add_remove_lock to be held */
14816 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
14817 {
14818- int err, nr_calls = 0;
14819+ int mycpu, err, nr_calls = 0;
14820 void *hcpu = (void *)(long)cpu;
14821 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
14822 struct take_cpu_down_param tcd_param = {
14823 .mod = mod,
14824 .hcpu = hcpu,
14825 };
14826+ cpumask_var_t cpumask;
14827
14828 if (num_online_cpus() == 1)
14829 return -EBUSY;
14830@@ -274,7 +542,26 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
14831 if (!cpu_online(cpu))
14832 return -EINVAL;
14833
14834+ /* Move the downtaker off the unplug cpu */
14835+ if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
14836+ return -ENOMEM;
14837+ cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
14838+ set_cpus_allowed_ptr(current, cpumask);
14839+ free_cpumask_var(cpumask);
14840+ migrate_disable();
14841+ mycpu = smp_processor_id();
14842+ if (mycpu == cpu) {
14843+ printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
14844+ migrate_enable();
14845+ return -EBUSY;
14846+ }
14847+
14848 cpu_hotplug_begin();
14849+ err = cpu_unplug_begin(cpu);
14850+ if (err) {
14851+ printk("cpu_unplug_begin(%d) failed\n", cpu);
14852+ goto out_cancel;
14853+ }
14854
14855 err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
14856 if (err) {
14857@@ -286,6 +573,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
14858 }
14859 smpboot_park_threads(cpu);
14860
14861+ /* Notifiers are done. Don't let any more tasks pin this CPU. */
14862+ cpu_unplug_sync(cpu);
14863+
14864 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
14865 if (err) {
14866 /* CPU didn't die: tell everyone. Can't complain. */
14867@@ -314,6 +604,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
14868 check_for_tasks(cpu);
14869
14870 out_release:
14871+ cpu_unplug_done(cpu);
14872+out_cancel:
14873+ migrate_enable();
14874 cpu_hotplug_done();
14875 if (!err)
14876 cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
14877diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
14878index 14ff484..399dba6 100644
14879--- a/kernel/debug/kdb/kdb_io.c
14880+++ b/kernel/debug/kdb/kdb_io.c
14881@@ -554,7 +554,6 @@ int vkdb_printf(const char *fmt, va_list ap)
14882 int linecount;
14883 int colcount;
14884 int logging, saved_loglevel = 0;
14885- int saved_trap_printk;
14886 int got_printf_lock = 0;
14887 int retlen = 0;
14888 int fnd, len;
14889@@ -565,8 +564,6 @@ int vkdb_printf(const char *fmt, va_list ap)
14890 unsigned long uninitialized_var(flags);
14891
14892 preempt_disable();
14893- saved_trap_printk = kdb_trap_printk;
14894- kdb_trap_printk = 0;
14895
14896 /* Serialize kdb_printf if multiple cpus try to write at once.
14897 * But if any cpu goes recursive in kdb, just print the output,
14898@@ -833,7 +830,6 @@ kdb_print_out:
14899 } else {
14900 __release(kdb_printf_lock);
14901 }
14902- kdb_trap_printk = saved_trap_printk;
14903 preempt_enable();
14904 return retlen;
14905 }
14906@@ -843,9 +839,11 @@ int kdb_printf(const char *fmt, ...)
14907 va_list ap;
14908 int r;
14909
14910+ kdb_trap_printk++;
14911 va_start(ap, fmt);
14912 r = vkdb_printf(fmt, ap);
14913 va_end(ap);
14914+ kdb_trap_printk--;
14915
14916 return r;
14917 }
14918diff --git a/kernel/events/core.c b/kernel/events/core.c
14919index 0600d3b..45f7b3e 100644
14920--- a/kernel/events/core.c
14921+++ b/kernel/events/core.c
14922@@ -5638,6 +5638,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
14923
14924 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
14925 hwc->hrtimer.function = perf_swevent_hrtimer;
14926+ hwc->hrtimer.irqsafe = 1;
14927
14928 /*
14929 * Since hrtimers have a fixed rate, we can do a static freq->period
14930diff --git a/kernel/exit.c b/kernel/exit.c
14931index b4df219..8fb4a48 100644
14932--- a/kernel/exit.c
14933+++ b/kernel/exit.c
14934@@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk)
14935 * Do this under ->siglock, we can race with another thread
14936 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
14937 */
14938- flush_sigqueue(&tsk->pending);
14939+ flush_task_sigqueue(tsk);
14940 tsk->sighand = NULL;
14941 spin_unlock(&sighand->siglock);
14942
14943diff --git a/kernel/fork.c b/kernel/fork.c
14944index 5630e52..b814a02 100644
14945--- a/kernel/fork.c
14946+++ b/kernel/fork.c
14947@@ -93,7 +93,7 @@ int max_threads; /* tunable limit on nr_threads */
14948
14949 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
14950
14951-__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
14952+DEFINE_RWLOCK(tasklist_lock); /* outer */
14953
14954 #ifdef CONFIG_PROVE_RCU
14955 int lockdep_tasklist_lock_is_held(void)
14956@@ -229,7 +229,9 @@ static inline void put_signal_struct(struct signal_struct *sig)
14957 if (atomic_dec_and_test(&sig->sigcnt))
14958 free_signal_struct(sig);
14959 }
14960-
14961+#ifdef CONFIG_PREEMPT_RT_BASE
14962+static
14963+#endif
14964 void __put_task_struct(struct task_struct *tsk)
14965 {
14966 WARN_ON(!tsk->exit_state);
14967@@ -244,7 +246,18 @@ void __put_task_struct(struct task_struct *tsk)
14968 if (!profile_handoff_task(tsk))
14969 free_task(tsk);
14970 }
14971+#ifndef CONFIG_PREEMPT_RT_BASE
14972 EXPORT_SYMBOL_GPL(__put_task_struct);
14973+#else
14974+void __put_task_struct_cb(struct rcu_head *rhp)
14975+{
14976+ struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
14977+
14978+ __put_task_struct(tsk);
14979+
14980+}
14981+EXPORT_SYMBOL_GPL(__put_task_struct_cb);
14982+#endif
14983
14984 void __init __weak arch_task_cache_init(void) { }
14985
14986@@ -603,6 +616,19 @@ void __mmdrop(struct mm_struct *mm)
14987 }
14988 EXPORT_SYMBOL_GPL(__mmdrop);
14989
14990+#ifdef CONFIG_PREEMPT_RT_BASE
14991+/*
14992+ * RCU callback for delayed mm drop. Not strictly rcu, but we don't
14993+ * want another facility to make this work.
14994+ */
14995+void __mmdrop_delayed(struct rcu_head *rhp)
14996+{
14997+ struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
14998+
14999+ __mmdrop(mm);
15000+}
15001+#endif
15002+
15003 /*
15004 * Decrement the use count and release all resources for an mm.
15005 */
15006@@ -1112,6 +1138,9 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
15007 */
15008 static void posix_cpu_timers_init(struct task_struct *tsk)
15009 {
15010+#ifdef CONFIG_PREEMPT_RT_BASE
15011+ tsk->posix_timer_list = NULL;
15012+#endif
15013 tsk->cputime_expires.prof_exp = 0;
15014 tsk->cputime_expires.virt_exp = 0;
15015 tsk->cputime_expires.sched_exp = 0;
15016@@ -1230,6 +1259,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
15017 spin_lock_init(&p->alloc_lock);
15018
15019 init_sigpending(&p->pending);
15020+ p->sigqueue_cache = NULL;
15021
15022 p->utime = p->stime = p->gtime = 0;
15023 p->utimescaled = p->stimescaled = 0;
15024@@ -1284,6 +1314,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
15025 p->hardirq_context = 0;
15026 p->softirq_context = 0;
15027 #endif
15028+#ifdef CONFIG_PREEMPT_RT_FULL
15029+ p->pagefault_disabled = 0;
15030+#endif
15031 #ifdef CONFIG_LOCKDEP
15032 p->lockdep_depth = 0; /* no locks held yet */
15033 p->curr_chain_key = 0;
15034diff --git a/kernel/futex.c b/kernel/futex.c
15035index 8879430..473c3c4 100644
15036--- a/kernel/futex.c
15037+++ b/kernel/futex.c
15038@@ -568,7 +568,9 @@ void exit_pi_state_list(struct task_struct *curr)
15039 * task still owns the PI-state:
15040 */
15041 if (head->next != next) {
15042+ raw_spin_unlock_irq(&curr->pi_lock);
15043 spin_unlock(&hb->lock);
15044+ raw_spin_lock_irq(&curr->pi_lock);
15045 continue;
15046 }
15047
15048@@ -1442,6 +1444,16 @@ retry_private:
15049 requeue_pi_wake_futex(this, &key2, hb2);
15050 drop_count++;
15051 continue;
15052+ } else if (ret == -EAGAIN) {
15053+ /*
15054+ * Waiter was woken by timeout or
15055+ * signal and has set pi_blocked_on to
15056+ * PI_WAKEUP_INPROGRESS before we
15057+ * tried to enqueue it on the rtmutex.
15058+ */
15059+ this->pi_state = NULL;
15060+ free_pi_state(pi_state);
15061+ continue;
15062 } else if (ret) {
15063 /* -EDEADLK */
15064 this->pi_state = NULL;
15065@@ -2286,7 +2298,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
15066 struct hrtimer_sleeper timeout, *to = NULL;
15067 struct rt_mutex_waiter rt_waiter;
15068 struct rt_mutex *pi_mutex = NULL;
15069- struct futex_hash_bucket *hb;
15070+ struct futex_hash_bucket *hb, *hb2;
15071 union futex_key key2 = FUTEX_KEY_INIT;
15072 struct futex_q q = futex_q_init;
15073 int res, ret;
15074@@ -2311,8 +2323,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
15075 * The waiter is allocated on our stack, manipulated by the requeue
15076 * code while we sleep on uaddr.
15077 */
15078- debug_rt_mutex_init_waiter(&rt_waiter);
15079- rt_waiter.task = NULL;
15080+ rt_mutex_init_waiter(&rt_waiter, false);
15081
15082 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
15083 if (unlikely(ret != 0))
15084@@ -2333,20 +2344,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
15085 /* Queue the futex_q, drop the hb lock, wait for wakeup. */
15086 futex_wait_queue_me(hb, &q, to);
15087
15088- spin_lock(&hb->lock);
15089- ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
15090- spin_unlock(&hb->lock);
15091- if (ret)
15092- goto out_put_keys;
15093+ /*
15094+ * On RT we must avoid races with requeue and trying to block
15095+ * on two mutexes (hb->lock and uaddr2's rtmutex) by
15096+ * serializing access to pi_blocked_on with pi_lock.
15097+ */
15098+ raw_spin_lock_irq(&current->pi_lock);
15099+ if (current->pi_blocked_on) {
15100+ /*
15101+ * We have been requeued or are in the process of
15102+ * being requeued.
15103+ */
15104+ raw_spin_unlock_irq(&current->pi_lock);
15105+ } else {
15106+ /*
15107+ * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
15108+ * prevents a concurrent requeue from moving us to the
15109+ * uaddr2 rtmutex. After that we can safely acquire
15110+ * (and possibly block on) hb->lock.
15111+ */
15112+ current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
15113+ raw_spin_unlock_irq(&current->pi_lock);
15114+
15115+ spin_lock(&hb->lock);
15116+
15117+ /*
15118+ * Clean up pi_blocked_on. We might leak it otherwise
15119+ * when we succeeded with the hb->lock in the fast
15120+ * path.
15121+ */
15122+ raw_spin_lock_irq(&current->pi_lock);
15123+ current->pi_blocked_on = NULL;
15124+ raw_spin_unlock_irq(&current->pi_lock);
15125+
15126+ ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
15127+ spin_unlock(&hb->lock);
15128+ if (ret)
15129+ goto out_put_keys;
15130+ }
15131
15132 /*
15133- * In order for us to be here, we know our q.key == key2, and since
15134- * we took the hb->lock above, we also know that futex_requeue() has
15135- * completed and we no longer have to concern ourselves with a wakeup
15136- * race with the atomic proxy lock acquisition by the requeue code. The
15137- * futex_requeue dropped our key1 reference and incremented our key2
15138- * reference count.
15139+ * In order to be here, we have either been requeued, are in
15140+ * the process of being requeued, or requeue successfully
15141+ * acquired uaddr2 on our behalf. If pi_blocked_on was
15142+ * non-null above, we may be racing with a requeue. Do not
15143+ * rely on q->lock_ptr to be hb2->lock until after blocking on
15144+ * hb->lock or hb2->lock. The futex_requeue dropped our key1
15145+ * reference and incremented our key2 reference count.
15146 */
15147+ hb2 = hash_futex(&key2);
15148
15149 /* Check if the requeue code acquired the second futex for us. */
15150 if (!q.rt_waiter) {
15151@@ -2355,9 +2401,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
15152 * did a lock-steal - fix up the PI-state in that case.
15153 */
15154 if (q.pi_state && (q.pi_state->owner != current)) {
15155- spin_lock(q.lock_ptr);
15156+ spin_lock(&hb2->lock);
15157+ BUG_ON(&hb2->lock != q.lock_ptr);
15158 ret = fixup_pi_state_owner(uaddr2, &q, current);
15159- spin_unlock(q.lock_ptr);
15160+ spin_unlock(&hb2->lock);
15161 }
15162 } else {
15163 /*
15164@@ -2370,7 +2417,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
15165 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
15166 debug_rt_mutex_free_waiter(&rt_waiter);
15167
15168- spin_lock(q.lock_ptr);
15169+ spin_lock(&hb2->lock);
15170+ BUG_ON(&hb2->lock != q.lock_ptr);
15171 /*
15172 * Fixup the pi_state owner and possibly acquire the lock if we
15173 * haven't already.
15174diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
15175index e4cee8d..c2c58f9 100644
15176--- a/kernel/hrtimer.c
15177+++ b/kernel/hrtimer.c
15178@@ -49,6 +49,7 @@
15179 #include <asm/uaccess.h>
15180
15181 #include <trace/events/timer.h>
15182+#include <trace/events/hist.h>
15183
15184 /*
15185 * The timer bases:
15186@@ -589,8 +590,7 @@ static int hrtimer_reprogram(struct hrtimer *timer,
15187 * When the callback is running, we do not reprogram the clock event
15188 * device. The timer callback is either running on a different CPU or
15189 * the callback is executed in the hrtimer_interrupt context. The
15190- * reprogramming is handled either by the softirq, which called the
15191- * callback or at the end of the hrtimer_interrupt.
15192+ * reprogramming is handled at the end of the hrtimer_interrupt.
15193 */
15194 if (hrtimer_callback_running(timer))
15195 return 0;
15196@@ -625,6 +625,9 @@ static int hrtimer_reprogram(struct hrtimer *timer,
15197 return res;
15198 }
15199
15200+static void __run_hrtimer(struct hrtimer *timer, ktime_t *now);
15201+static int hrtimer_rt_defer(struct hrtimer *timer);
15202+
15203 /*
15204 * Initialize the high resolution related parts of cpu_base
15205 */
15206@@ -641,9 +644,18 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
15207 * and expiry check is done in the hrtimer_interrupt or in the softirq.
15208 */
15209 static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
15210- struct hrtimer_clock_base *base)
15211+ struct hrtimer_clock_base *base,
15212+ int wakeup)
15213 {
15214- return base->cpu_base->hres_active && hrtimer_reprogram(timer, base);
15215+ if (!(base->cpu_base->hres_active && hrtimer_reprogram(timer, base)))
15216+ return 0;
15217+ if (!wakeup)
15218+ return -ETIME;
15219+#ifdef CONFIG_PREEMPT_RT_BASE
15220+ if (!hrtimer_rt_defer(timer))
15221+ return -ETIME;
15222+#endif
15223+ return 1;
15224 }
15225
15226 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
15227@@ -724,12 +736,18 @@ static inline int hrtimer_switch_to_hres(void) { return 0; }
15228 static inline void
15229 hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
15230 static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
15231- struct hrtimer_clock_base *base)
15232+ struct hrtimer_clock_base *base,
15233+ int wakeup)
15234 {
15235 return 0;
15236 }
15237 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
15238 static inline void retrigger_next_event(void *arg) { }
15239+static inline int hrtimer_reprogram(struct hrtimer *timer,
15240+ struct hrtimer_clock_base *base)
15241+{
15242+ return 0;
15243+}
15244
15245 #endif /* CONFIG_HIGH_RES_TIMERS */
15246
15247@@ -844,6 +862,32 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
15248 }
15249 EXPORT_SYMBOL_GPL(hrtimer_forward);
15250
15251+#ifdef CONFIG_PREEMPT_RT_BASE
15252+# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
15253+
15254+/**
15255+ * hrtimer_wait_for_timer - Wait for a running timer
15256+ *
15257+ * @timer: timer to wait for
15258+ *
15259+ * The function waits in case the timers callback function is
15260+ * currently executed on the waitqueue of the timer base. The
15261+ * waitqueue is woken up after the timer callback function has
15262+ * finished execution.
15263+ */
15264+void hrtimer_wait_for_timer(const struct hrtimer *timer)
15265+{
15266+ struct hrtimer_clock_base *base = timer->base;
15267+
15268+ if (base && base->cpu_base && !timer->irqsafe)
15269+ wait_event(base->cpu_base->wait,
15270+ !(timer->state & HRTIMER_STATE_CALLBACK));
15271+}
15272+
15273+#else
15274+# define wake_up_timer_waiters(b) do { } while (0)
15275+#endif
15276+
15277 /*
15278 * enqueue_hrtimer - internal function to (re)start a timer
15279 *
15280@@ -887,6 +931,11 @@ static void __remove_hrtimer(struct hrtimer *timer,
15281 if (!(timer->state & HRTIMER_STATE_ENQUEUED))
15282 goto out;
15283
15284+ if (unlikely(!list_empty(&timer->cb_entry))) {
15285+ list_del_init(&timer->cb_entry);
15286+ goto out;
15287+ }
15288+
15289 next_timer = timerqueue_getnext(&base->active);
15290 timerqueue_del(&base->active, &timer->node);
15291 if (&timer->node == next_timer) {
15292@@ -971,6 +1020,17 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
15293 #endif
15294 }
15295
15296+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
15297+ {
15298+ ktime_t now = new_base->get_time();
15299+
15300+ if (ktime_to_ns(tim) < ktime_to_ns(now))
15301+ timer->praecox = now;
15302+ else
15303+ timer->praecox = ktime_set(0, 0);
15304+ }
15305+#endif
15306+
15307 hrtimer_set_expires_range_ns(timer, tim, delta_ns);
15308
15309 timer_stats_hrtimer_set_start_info(timer);
15310@@ -983,9 +1043,19 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
15311 *
15312 * XXX send_remote_softirq() ?
15313 */
15314- if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
15315- && hrtimer_enqueue_reprogram(timer, new_base)) {
15316- if (wakeup) {
15317+ if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) {
15318+ ret = hrtimer_enqueue_reprogram(timer, new_base, wakeup);
15319+ if (ret < 0) {
15320+ /*
15321+ * In case we failed to reprogram the timer (mostly
15322+ * because out current timer is already elapsed),
15323+ * remove it again and report a failure. This avoids
15324+ * stale base->first entries.
15325+ */
15326+ debug_deactivate(timer);
15327+ __remove_hrtimer(timer, new_base,
15328+ timer->state & HRTIMER_STATE_CALLBACK, 0);
15329+ } else if (ret > 0) {
15330 /*
15331 * We need to drop cpu_base->lock to avoid a
15332 * lock ordering issue vs. rq->lock.
15333@@ -993,9 +1063,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
15334 raw_spin_unlock(&new_base->cpu_base->lock);
15335 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
15336 local_irq_restore(flags);
15337- return ret;
15338- } else {
15339- __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
15340+ return 0;
15341 }
15342 }
15343
15344@@ -1083,7 +1151,7 @@ int hrtimer_cancel(struct hrtimer *timer)
15345
15346 if (ret >= 0)
15347 return ret;
15348- cpu_relax();
15349+ hrtimer_wait_for_timer(timer);
15350 }
15351 }
15352 EXPORT_SYMBOL_GPL(hrtimer_cancel);
15353@@ -1162,6 +1230,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
15354
15355 base = hrtimer_clockid_to_base(clock_id);
15356 timer->base = &cpu_base->clock_base[base];
15357+ INIT_LIST_HEAD(&timer->cb_entry);
15358 timerqueue_init(&timer->node);
15359
15360 #ifdef CONFIG_TIMER_STATS
15361@@ -1245,6 +1314,126 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
15362 timer->state &= ~HRTIMER_STATE_CALLBACK;
15363 }
15364
15365+static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
15366+
15367+#ifdef CONFIG_PREEMPT_RT_BASE
15368+static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
15369+ struct hrtimer_clock_base *base)
15370+{
15371+ /*
15372+ * Note, we clear the callback flag before we requeue the
15373+ * timer otherwise we trigger the callback_running() check
15374+ * in hrtimer_reprogram().
15375+ */
15376+ timer->state &= ~HRTIMER_STATE_CALLBACK;
15377+
15378+ if (restart != HRTIMER_NORESTART) {
15379+ BUG_ON(hrtimer_active(timer));
15380+ /*
15381+ * Enqueue the timer, if it's the leftmost timer then
15382+ * we need to reprogram it.
15383+ */
15384+ if (!enqueue_hrtimer(timer, base))
15385+ return;
15386+
15387+#ifndef CONFIG_HIGH_RES_TIMERS
15388+ }
15389+#else
15390+ if (base->cpu_base->hres_active &&
15391+ hrtimer_reprogram(timer, base))
15392+ goto requeue;
15393+
15394+ } else if (hrtimer_active(timer)) {
15395+ /*
15396+ * If the timer was rearmed on another CPU, reprogram
15397+ * the event device.
15398+ */
15399+ if (&timer->node == base->active.next &&
15400+ base->cpu_base->hres_active &&
15401+ hrtimer_reprogram(timer, base))
15402+ goto requeue;
15403+ }
15404+ return;
15405+
15406+requeue:
15407+ /*
15408+ * Timer is expired. Thus move it from tree to pending list
15409+ * again.
15410+ */
15411+ __remove_hrtimer(timer, base, timer->state, 0);
15412+ list_add_tail(&timer->cb_entry, &base->expired);
15413+#endif
15414+}
15415+
15416+/*
15417+ * The changes in mainline which removed the callback modes from
15418+ * hrtimer are not yet working with -rt. The non wakeup_process()
15419+ * based callbacks which involve sleeping locks need to be treated
15420+ * seperately.
15421+ */
15422+static void hrtimer_rt_run_pending(void)
15423+{
15424+ enum hrtimer_restart (*fn)(struct hrtimer *);
15425+ struct hrtimer_cpu_base *cpu_base;
15426+ struct hrtimer_clock_base *base;
15427+ struct hrtimer *timer;
15428+ int index, restart;
15429+
15430+ local_irq_disable();
15431+ cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
15432+
15433+ raw_spin_lock(&cpu_base->lock);
15434+
15435+ for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
15436+ base = &cpu_base->clock_base[index];
15437+
15438+ while (!list_empty(&base->expired)) {
15439+ timer = list_first_entry(&base->expired,
15440+ struct hrtimer, cb_entry);
15441+
15442+ /*
15443+ * Same as the above __run_hrtimer function
15444+ * just we run with interrupts enabled.
15445+ */
15446+ debug_hrtimer_deactivate(timer);
15447+ __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
15448+ timer_stats_account_hrtimer(timer);
15449+ fn = timer->function;
15450+
15451+ raw_spin_unlock_irq(&cpu_base->lock);
15452+ restart = fn(timer);
15453+ raw_spin_lock_irq(&cpu_base->lock);
15454+
15455+ hrtimer_rt_reprogram(restart, timer, base);
15456+ }
15457+ }
15458+
15459+ raw_spin_unlock_irq(&cpu_base->lock);
15460+
15461+ wake_up_timer_waiters(cpu_base);
15462+}
15463+
15464+static int hrtimer_rt_defer(struct hrtimer *timer)
15465+{
15466+ if (timer->irqsafe)
15467+ return 0;
15468+
15469+ __remove_hrtimer(timer, timer->base, timer->state, 0);
15470+ list_add_tail(&timer->cb_entry, &timer->base->expired);
15471+ return 1;
15472+}
15473+
15474+#else
15475+
15476+static inline void hrtimer_rt_run_pending(void)
15477+{
15478+ hrtimer_peek_ahead_timers();
15479+}
15480+
15481+static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
15482+
15483+#endif
15484+
15485 #ifdef CONFIG_HIGH_RES_TIMERS
15486
15487 /*
15488@@ -1255,7 +1444,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
15489 {
15490 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
15491 ktime_t expires_next, now, entry_time, delta;
15492- int i, retries = 0;
15493+ int i, retries = 0, raise = 0;
15494
15495 BUG_ON(!cpu_base->hres_active);
15496 cpu_base->nr_events++;
15497@@ -1290,6 +1479,15 @@ retry:
15498
15499 timer = container_of(node, struct hrtimer, node);
15500
15501+ trace_hrtimer_interrupt(raw_smp_processor_id(),
15502+ ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
15503+ timer->praecox : hrtimer_get_expires(timer),
15504+ basenow)),
15505+ current,
15506+ timer->function == hrtimer_wakeup ?
15507+ container_of(timer, struct hrtimer_sleeper,
15508+ timer)->task : NULL);
15509+
15510 /*
15511 * The immediate goal for using the softexpires is
15512 * minimizing wakeups, not running timers at the
15513@@ -1313,7 +1511,10 @@ retry:
15514 break;
15515 }
15516
15517- __run_hrtimer(timer, &basenow);
15518+ if (!hrtimer_rt_defer(timer))
15519+ __run_hrtimer(timer, &basenow);
15520+ else
15521+ raise = 1;
15522 }
15523 }
15524
15525@@ -1328,7 +1529,7 @@ retry:
15526 if (expires_next.tv64 == KTIME_MAX ||
15527 !tick_program_event(expires_next, 0)) {
15528 cpu_base->hang_detected = 0;
15529- return;
15530+ goto out;
15531 }
15532
15533 /*
15534@@ -1372,6 +1573,9 @@ retry:
15535 tick_program_event(expires_next, 1);
15536 printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
15537 ktime_to_ns(delta));
15538+out:
15539+ if (raise)
15540+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
15541 }
15542
15543 /*
15544@@ -1408,24 +1612,26 @@ void hrtimer_peek_ahead_timers(void)
15545 local_irq_restore(flags);
15546 }
15547
15548+#else /* CONFIG_HIGH_RES_TIMERS */
15549+
15550+static inline void __hrtimer_peek_ahead_timers(void) { }
15551+
15552+#endif /* !CONFIG_HIGH_RES_TIMERS */
15553+
15554 static void run_hrtimer_softirq(struct softirq_action *h)
15555 {
15556+#ifdef CONFIG_HIGH_RES_TIMERS
15557 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
15558
15559 if (cpu_base->clock_was_set) {
15560 cpu_base->clock_was_set = 0;
15561 clock_was_set();
15562 }
15563+#endif
15564
15565- hrtimer_peek_ahead_timers();
15566+ hrtimer_rt_run_pending();
15567 }
15568
15569-#else /* CONFIG_HIGH_RES_TIMERS */
15570-
15571-static inline void __hrtimer_peek_ahead_timers(void) { }
15572-
15573-#endif /* !CONFIG_HIGH_RES_TIMERS */
15574-
15575 /*
15576 * Called from timer softirq every jiffy, expire hrtimers:
15577 *
15578@@ -1458,7 +1664,7 @@ void hrtimer_run_queues(void)
15579 struct timerqueue_node *node;
15580 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
15581 struct hrtimer_clock_base *base;
15582- int index, gettime = 1;
15583+ int index, gettime = 1, raise = 0;
15584
15585 if (hrtimer_hres_active())
15586 return;
15587@@ -1483,10 +1689,16 @@ void hrtimer_run_queues(void)
15588 hrtimer_get_expires_tv64(timer))
15589 break;
15590
15591- __run_hrtimer(timer, &base->softirq_time);
15592+ if (!hrtimer_rt_defer(timer))
15593+ __run_hrtimer(timer, &base->softirq_time);
15594+ else
15595+ raise = 1;
15596 }
15597 raw_spin_unlock(&cpu_base->lock);
15598 }
15599+
15600+ if (raise)
15601+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
15602 }
15603
15604 /*
15605@@ -1508,6 +1720,7 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
15606 void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
15607 {
15608 sl->timer.function = hrtimer_wakeup;
15609+ sl->timer.irqsafe = 1;
15610 sl->task = task;
15611 }
15612 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
15613@@ -1644,9 +1857,13 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
15614 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
15615 cpu_base->clock_base[i].cpu_base = cpu_base;
15616 timerqueue_init_head(&cpu_base->clock_base[i].active);
15617+ INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
15618 }
15619
15620 hrtimer_init_hres(cpu_base);
15621+#ifdef CONFIG_PREEMPT_RT_BASE
15622+ init_waitqueue_head(&cpu_base->wait);
15623+#endif
15624 }
15625
15626 #ifdef CONFIG_HOTPLUG_CPU
15627@@ -1759,9 +1976,7 @@ void __init hrtimers_init(void)
15628 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
15629 (void *)(long)smp_processor_id());
15630 register_cpu_notifier(&hrtimers_nb);
15631-#ifdef CONFIG_HIGH_RES_TIMERS
15632 open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
15633-#endif
15634 }
15635
15636 /**
15637diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
15638index 131ca17..7f50c55 100644
15639--- a/kernel/irq/handle.c
15640+++ b/kernel/irq/handle.c
15641@@ -132,6 +132,8 @@ static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
15642 irqreturn_t
15643 handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
15644 {
15645+ struct pt_regs *regs = get_irq_regs();
15646+ u64 ip = regs ? instruction_pointer(regs) : 0;
15647 irqreturn_t retval = IRQ_NONE;
15648 unsigned int flags = 0, irq = desc->irq_data.irq;
15649
15650@@ -172,7 +174,11 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
15651 action = action->next;
15652 } while (action);
15653
15654- add_interrupt_randomness(irq, flags);
15655+#ifndef CONFIG_PREEMPT_RT_FULL
15656+ add_interrupt_randomness(irq, flags, ip);
15657+#else
15658+ desc->random_ip = ip;
15659+#endif
15660
15661 if (!noirqdebug)
15662 note_interrupt(irq, desc, retval);
15663diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
15664index 192a302..473b2b6 100644
15665--- a/kernel/irq/irqdesc.c
15666+++ b/kernel/irq/irqdesc.c
15667@@ -23,10 +23,27 @@
15668 static struct lock_class_key irq_desc_lock_class;
15669
15670 #if defined(CONFIG_SMP)
15671+static int __init irq_affinity_setup(char *str)
15672+{
15673+ zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
15674+ cpulist_parse(str, irq_default_affinity);
15675+ /*
15676+ * Set at least the boot cpu. We don't want to end up with
15677+ * bugreports caused by random comandline masks
15678+ */
15679+ cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
15680+ return 1;
15681+}
15682+__setup("irqaffinity=", irq_affinity_setup);
15683+
15684 static void __init init_irq_default_affinity(void)
15685 {
15686- alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
15687- cpumask_setall(irq_default_affinity);
15688+#ifdef CONFIG_CPUMASK_OFFSTACK
15689+ if (!irq_default_affinity)
15690+ zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
15691+#endif
15692+ if (cpumask_empty(irq_default_affinity))
15693+ cpumask_setall(irq_default_affinity);
15694 }
15695 #else
15696 static void __init init_irq_default_affinity(void)
15697diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
15698index e49a288..1fba5cb 100644
15699--- a/kernel/irq/manage.c
15700+++ b/kernel/irq/manage.c
15701@@ -21,6 +21,7 @@
15702 #include "internals.h"
15703
15704 #ifdef CONFIG_IRQ_FORCED_THREADING
15705+# ifndef CONFIG_PREEMPT_RT_BASE
15706 __read_mostly bool force_irqthreads;
15707
15708 static int __init setup_forced_irqthreads(char *arg)
15709@@ -29,6 +30,7 @@ static int __init setup_forced_irqthreads(char *arg)
15710 return 0;
15711 }
15712 early_param("threadirqs", setup_forced_irqthreads);
15713+# endif
15714 #endif
15715
15716 /**
15717@@ -780,7 +782,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
15718 local_bh_disable();
15719 ret = action->thread_fn(action->irq, action->dev_id);
15720 irq_finalize_oneshot(desc, action);
15721- local_bh_enable();
15722+ /*
15723+ * Interrupts which have real time requirements can be set up
15724+ * to avoid softirq processing in the thread handler. This is
15725+ * safe as these interrupts do not raise soft interrupts.
15726+ */
15727+ if (irq_settings_no_softirq_call(desc))
15728+ _local_bh_enable();
15729+ else
15730+ local_bh_enable();
15731 return ret;
15732 }
15733
15734@@ -869,6 +879,12 @@ static int irq_thread(void *data)
15735 if (!noirqdebug)
15736 note_interrupt(action->irq, desc, action_ret);
15737
15738+#ifdef CONFIG_PREEMPT_RT_FULL
15739+ migrate_disable();
15740+ add_interrupt_randomness(action->irq, 0,
15741+ desc->random_ip ^ (unsigned long) action);
15742+ migrate_enable();
15743+#endif
15744 wake_threads_waitq(desc);
15745 }
15746
15747@@ -1125,6 +1141,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
15748 irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
15749 }
15750
15751+ if (new->flags & IRQF_NO_SOFTIRQ_CALL)
15752+ irq_settings_set_no_softirq_call(desc);
15753+
15754 /* Set default affinity mask once everything is setup */
15755 setup_affinity(irq, desc, mask);
15756
15757diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
15758index 1162f10..0d2c381 100644
15759--- a/kernel/irq/settings.h
15760+++ b/kernel/irq/settings.h
15761@@ -14,6 +14,7 @@ enum {
15762 _IRQ_NO_BALANCING = IRQ_NO_BALANCING,
15763 _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
15764 _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
15765+ _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL,
15766 _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
15767 };
15768
15769@@ -26,6 +27,7 @@ enum {
15770 #define IRQ_NOAUTOEN GOT_YOU_MORON
15771 #define IRQ_NESTED_THREAD GOT_YOU_MORON
15772 #define IRQ_PER_CPU_DEVID GOT_YOU_MORON
15773+#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON
15774 #undef IRQF_MODIFY_MASK
15775 #define IRQF_MODIFY_MASK GOT_YOU_MORON
15776
15777@@ -36,6 +38,16 @@ irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
15778 desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
15779 }
15780
15781+static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
15782+{
15783+ return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
15784+}
15785+
15786+static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
15787+{
15788+ desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
15789+}
15790+
15791 static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
15792 {
15793 return desc->status_use_accessors & _IRQ_PER_CPU;
15794diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
15795index 7b5f012..38a32b0 100644
15796--- a/kernel/irq/spurious.c
15797+++ b/kernel/irq/spurious.c
15798@@ -340,6 +340,11 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
15799
15800 static int __init irqfixup_setup(char *str)
15801 {
15802+#ifdef CONFIG_PREEMPT_RT_BASE
15803+ printk(KERN_WARNING "irqfixup boot option not supported "
15804+ "w/ CONFIG_PREEMPT_RT_BASE\n");
15805+ return 1;
15806+#endif
15807 irqfixup = 1;
15808 printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
15809 printk(KERN_WARNING "This may impact system performance.\n");
15810@@ -352,6 +357,11 @@ module_param(irqfixup, int, 0644);
15811
15812 static int __init irqpoll_setup(char *str)
15813 {
15814+#ifdef CONFIG_PREEMPT_RT_BASE
15815+ printk(KERN_WARNING "irqpoll boot option not supported "
15816+ "w/ CONFIG_PREEMPT_RT_BASE\n");
15817+ return 1;
15818+#endif
15819 irqfixup = 2;
15820 printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
15821 "enabled\n");
15822diff --git a/kernel/irq_work.c b/kernel/irq_work.c
15823index 1588e3b..170c2ea 100644
15824--- a/kernel/irq_work.c
15825+++ b/kernel/irq_work.c
15826@@ -107,8 +107,10 @@ void irq_work_run(void)
15827 if (llist_empty(this_list))
15828 return;
15829
15830+#ifndef CONFIG_PREEMPT_RT_FULL
15831 BUG_ON(!in_irq());
15832 BUG_ON(!irqs_disabled());
15833+#endif
15834
15835 llnode = llist_del_all(this_list);
15836 while (llnode != NULL) {
15837diff --git a/kernel/itimer.c b/kernel/itimer.c
15838index 8d262b4..d051390 100644
15839--- a/kernel/itimer.c
15840+++ b/kernel/itimer.c
15841@@ -213,6 +213,7 @@ again:
15842 /* We are sharing ->siglock with it_real_fn() */
15843 if (hrtimer_try_to_cancel(timer) < 0) {
15844 spin_unlock_irq(&tsk->sighand->siglock);
15845+ hrtimer_wait_for_timer(&tsk->signal->real_timer);
15846 goto again;
15847 }
15848 expires = timeval_to_ktime(value->it_value);
15849diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
15850index 6ada93c..1c991e3 100644
15851--- a/kernel/ksysfs.c
15852+++ b/kernel/ksysfs.c
15853@@ -132,6 +132,15 @@ KERNEL_ATTR_RO(vmcoreinfo);
15854
15855 #endif /* CONFIG_KEXEC */
15856
15857+#if defined(CONFIG_PREEMPT_RT_FULL)
15858+static ssize_t realtime_show(struct kobject *kobj,
15859+ struct kobj_attribute *attr, char *buf)
15860+{
15861+ return sprintf(buf, "%d\n", 1);
15862+}
15863+KERNEL_ATTR_RO(realtime);
15864+#endif
15865+
15866 /* whether file capabilities are enabled */
15867 static ssize_t fscaps_show(struct kobject *kobj,
15868 struct kobj_attribute *attr, char *buf)
15869@@ -197,6 +206,9 @@ static struct attribute * kernel_attrs[] = {
15870 &vmcoreinfo_attr.attr,
15871 #endif
15872 &rcu_expedited_attr.attr,
15873+#ifdef CONFIG_PREEMPT_RT_FULL
15874+ &realtime_attr.attr,
15875+#endif
15876 NULL
15877 };
15878
15879diff --git a/kernel/lglock.c b/kernel/lglock.c
15880index 6535a66..0bbf5d1 100644
15881--- a/kernel/lglock.c
15882+++ b/kernel/lglock.c
15883@@ -4,6 +4,15 @@
15884 #include <linux/cpu.h>
15885 #include <linux/string.h>
15886
15887+#ifndef CONFIG_PREEMPT_RT_FULL
15888+# define lg_lock_ptr arch_spinlock_t
15889+# define lg_do_lock(l) arch_spin_lock(l)
15890+# define lg_do_unlock(l) arch_spin_unlock(l)
15891+#else
15892+# define lg_lock_ptr struct rt_mutex
15893+# define lg_do_lock(l) __rt_spin_lock(l)
15894+# define lg_do_unlock(l) __rt_spin_unlock(l)
15895+#endif
15896 /*
15897 * Note there is no uninit, so lglocks cannot be defined in
15898 * modules (but it's fine to use them from there)
15899@@ -12,51 +21,60 @@
15900
15901 void lg_lock_init(struct lglock *lg, char *name)
15902 {
15903+#ifdef CONFIG_PREEMPT_RT_FULL
15904+ int i;
15905+
15906+ for_each_possible_cpu(i) {
15907+ struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
15908+
15909+ rt_mutex_init(lock);
15910+ }
15911+#endif
15912 LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
15913 }
15914 EXPORT_SYMBOL(lg_lock_init);
15915
15916 void lg_local_lock(struct lglock *lg)
15917 {
15918- arch_spinlock_t *lock;
15919+ lg_lock_ptr *lock;
15920
15921- preempt_disable();
15922+ migrate_disable();
15923 rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_);
15924 lock = this_cpu_ptr(lg->lock);
15925- arch_spin_lock(lock);
15926+ lg_do_lock(lock);
15927 }
15928 EXPORT_SYMBOL(lg_local_lock);
15929
15930 void lg_local_unlock(struct lglock *lg)
15931 {
15932- arch_spinlock_t *lock;
15933+ lg_lock_ptr *lock;
15934
15935 rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
15936 lock = this_cpu_ptr(lg->lock);
15937- arch_spin_unlock(lock);
15938- preempt_enable();
15939+ lg_do_unlock(lock);
15940+ migrate_enable();
15941 }
15942 EXPORT_SYMBOL(lg_local_unlock);
15943
15944 void lg_local_lock_cpu(struct lglock *lg, int cpu)
15945 {
15946- arch_spinlock_t *lock;
15947+ lg_lock_ptr *lock;
15948
15949- preempt_disable();
15950+ preempt_disable_nort();
15951 rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_);
15952 lock = per_cpu_ptr(lg->lock, cpu);
15953- arch_spin_lock(lock);
15954+ lg_do_lock(lock);
15955 }
15956 EXPORT_SYMBOL(lg_local_lock_cpu);
15957
15958 void lg_local_unlock_cpu(struct lglock *lg, int cpu)
15959 {
15960- arch_spinlock_t *lock;
15961+ lg_lock_ptr *lock;
15962
15963 rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
15964 lock = per_cpu_ptr(lg->lock, cpu);
15965- arch_spin_unlock(lock);
15966- preempt_enable();
15967+ lg_do_unlock(lock);
15968+ preempt_enable_nort();
15969 }
15970 EXPORT_SYMBOL(lg_local_unlock_cpu);
15971
15972@@ -64,12 +82,12 @@ void lg_global_lock(struct lglock *lg)
15973 {
15974 int i;
15975
15976- preempt_disable();
15977+ preempt_disable_nort();
15978 rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_);
15979 for_each_possible_cpu(i) {
15980- arch_spinlock_t *lock;
15981+ lg_lock_ptr *lock;
15982 lock = per_cpu_ptr(lg->lock, i);
15983- arch_spin_lock(lock);
15984+ lg_do_lock(lock);
15985 }
15986 }
15987 EXPORT_SYMBOL(lg_global_lock);
15988@@ -80,10 +98,10 @@ void lg_global_unlock(struct lglock *lg)
15989
15990 rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
15991 for_each_possible_cpu(i) {
15992- arch_spinlock_t *lock;
15993+ lg_lock_ptr *lock;
15994 lock = per_cpu_ptr(lg->lock, i);
15995- arch_spin_unlock(lock);
15996+ lg_do_unlock(lock);
15997 }
15998- preempt_enable();
15999+ preempt_enable_nort();
16000 }
16001 EXPORT_SYMBOL(lg_global_unlock);
16002diff --git a/kernel/lockdep.c b/kernel/lockdep.c
16003index 7981e5b..7f03801 100644
16004--- a/kernel/lockdep.c
16005+++ b/kernel/lockdep.c
16006@@ -3534,6 +3534,7 @@ static void check_flags(unsigned long flags)
16007 }
16008 }
16009
16010+#ifndef CONFIG_PREEMPT_RT_FULL
16011 /*
16012 * We dont accurately track softirq state in e.g.
16013 * hardirq contexts (such as on 4KSTACKS), so only
16014@@ -3548,6 +3549,7 @@ static void check_flags(unsigned long flags)
16015 DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
16016 }
16017 }
16018+#endif
16019
16020 if (!debug_locks)
16021 print_irqtrace_events(current);
16022diff --git a/kernel/panic.c b/kernel/panic.c
16023index e1b2822..5dc4381 100644
16024--- a/kernel/panic.c
16025+++ b/kernel/panic.c
16026@@ -371,9 +371,11 @@ static u64 oops_id;
16027
16028 static int init_oops_id(void)
16029 {
16030+#ifndef CONFIG_PREEMPT_RT_FULL
16031 if (!oops_id)
16032 get_random_bytes(&oops_id, sizeof(oops_id));
16033 else
16034+#endif
16035 oops_id++;
16036
16037 return 0;
16038diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
16039index 942ca27..06692e8 100644
16040--- a/kernel/posix-cpu-timers.c
16041+++ b/kernel/posix-cpu-timers.c
16042@@ -661,7 +661,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
16043 /*
16044 * Disarm any old timer after extracting its expiry time.
16045 */
16046- BUG_ON(!irqs_disabled());
16047+ BUG_ON_NONRT(!irqs_disabled());
16048
16049 ret = 0;
16050 old_incr = timer->it.cpu.incr;
16051@@ -1177,7 +1177,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
16052 /*
16053 * Now re-arm for the new expiry time.
16054 */
16055- BUG_ON(!irqs_disabled());
16056+ BUG_ON_NONRT(!irqs_disabled());
16057 arm_timer(timer);
16058 spin_unlock(&p->sighand->siglock);
16059
16060@@ -1241,10 +1241,11 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
16061 sig = tsk->signal;
16062 if (sig->cputimer.running) {
16063 struct task_cputime group_sample;
16064+ unsigned long flags;
16065
16066- raw_spin_lock(&sig->cputimer.lock);
16067+ raw_spin_lock_irqsave(&sig->cputimer.lock, flags);
16068 group_sample = sig->cputimer.cputime;
16069- raw_spin_unlock(&sig->cputimer.lock);
16070+ raw_spin_unlock_irqrestore(&sig->cputimer.lock, flags);
16071
16072 if (task_cputime_expired(&group_sample, &sig->cputime_expires))
16073 return 1;
16074@@ -1258,13 +1259,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
16075 * already updated our counts. We need to check if any timers fire now.
16076 * Interrupts are disabled.
16077 */
16078-void run_posix_cpu_timers(struct task_struct *tsk)
16079+static void __run_posix_cpu_timers(struct task_struct *tsk)
16080 {
16081 LIST_HEAD(firing);
16082 struct k_itimer *timer, *next;
16083 unsigned long flags;
16084
16085- BUG_ON(!irqs_disabled());
16086+ BUG_ON_NONRT(!irqs_disabled());
16087
16088 /*
16089 * The fast path checks that there are no expired thread or thread
16090@@ -1322,6 +1323,190 @@ void run_posix_cpu_timers(struct task_struct *tsk)
16091 }
16092 }
16093
16094+#ifdef CONFIG_PREEMPT_RT_BASE
16095+#include <linux/kthread.h>
16096+#include <linux/cpu.h>
16097+DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
16098+DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
16099+
16100+static int posix_cpu_timers_thread(void *data)
16101+{
16102+ int cpu = (long)data;
16103+
16104+ BUG_ON(per_cpu(posix_timer_task,cpu) != current);
16105+
16106+ while (!kthread_should_stop()) {
16107+ struct task_struct *tsk = NULL;
16108+ struct task_struct *next = NULL;
16109+
16110+ if (cpu_is_offline(cpu))
16111+ goto wait_to_die;
16112+
16113+ /* grab task list */
16114+ raw_local_irq_disable();
16115+ tsk = per_cpu(posix_timer_tasklist, cpu);
16116+ per_cpu(posix_timer_tasklist, cpu) = NULL;
16117+ raw_local_irq_enable();
16118+
16119+ /* its possible the list is empty, just return */
16120+ if (!tsk) {
16121+ set_current_state(TASK_INTERRUPTIBLE);
16122+ schedule();
16123+ __set_current_state(TASK_RUNNING);
16124+ continue;
16125+ }
16126+
16127+ /* Process task list */
16128+ while (1) {
16129+ /* save next */
16130+ next = tsk->posix_timer_list;
16131+
16132+ /* run the task timers, clear its ptr and
16133+ * unreference it
16134+ */
16135+ __run_posix_cpu_timers(tsk);
16136+ tsk->posix_timer_list = NULL;
16137+ put_task_struct(tsk);
16138+
16139+ /* check if this is the last on the list */
16140+ if (next == tsk)
16141+ break;
16142+ tsk = next;
16143+ }
16144+ }
16145+ return 0;
16146+
16147+wait_to_die:
16148+ /* Wait for kthread_stop */
16149+ set_current_state(TASK_INTERRUPTIBLE);
16150+ while (!kthread_should_stop()) {
16151+ schedule();
16152+ set_current_state(TASK_INTERRUPTIBLE);
16153+ }
16154+ __set_current_state(TASK_RUNNING);
16155+ return 0;
16156+}
16157+
16158+static inline int __fastpath_timer_check(struct task_struct *tsk)
16159+{
16160+ /* tsk == current, ensure it is safe to use ->signal/sighand */
16161+ if (unlikely(tsk->exit_state))
16162+ return 0;
16163+
16164+ if (!task_cputime_zero(&tsk->cputime_expires))
16165+ return 1;
16166+
16167+ if (!task_cputime_zero(&tsk->signal->cputime_expires))
16168+ return 1;
16169+
16170+ return 0;
16171+}
16172+
16173+void run_posix_cpu_timers(struct task_struct *tsk)
16174+{
16175+ unsigned long cpu = smp_processor_id();
16176+ struct task_struct *tasklist;
16177+
16178+ BUG_ON(!irqs_disabled());
16179+ if(!per_cpu(posix_timer_task, cpu))
16180+ return;
16181+ /* get per-cpu references */
16182+ tasklist = per_cpu(posix_timer_tasklist, cpu);
16183+
16184+ /* check to see if we're already queued */
16185+ if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
16186+ get_task_struct(tsk);
16187+ if (tasklist) {
16188+ tsk->posix_timer_list = tasklist;
16189+ } else {
16190+ /*
16191+ * The list is terminated by a self-pointing
16192+ * task_struct
16193+ */
16194+ tsk->posix_timer_list = tsk;
16195+ }
16196+ per_cpu(posix_timer_tasklist, cpu) = tsk;
16197+
16198+ wake_up_process(per_cpu(posix_timer_task, cpu));
16199+ }
16200+}
16201+
16202+/*
16203+ * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
16204+ * Here we can start up the necessary migration thread for the new CPU.
16205+ */
16206+static int posix_cpu_thread_call(struct notifier_block *nfb,
16207+ unsigned long action, void *hcpu)
16208+{
16209+ int cpu = (long)hcpu;
16210+ struct task_struct *p;
16211+ struct sched_param param;
16212+
16213+ switch (action) {
16214+ case CPU_UP_PREPARE:
16215+ p = kthread_create(posix_cpu_timers_thread, hcpu,
16216+ "posixcputmr/%d",cpu);
16217+ if (IS_ERR(p))
16218+ return NOTIFY_BAD;
16219+ p->flags |= PF_NOFREEZE;
16220+ kthread_bind(p, cpu);
16221+ /* Must be high prio to avoid getting starved */
16222+ param.sched_priority = MAX_RT_PRIO-1;
16223+ sched_setscheduler(p, SCHED_FIFO, &param);
16224+ per_cpu(posix_timer_task,cpu) = p;
16225+ break;
16226+ case CPU_ONLINE:
16227+ /* Strictly unneccessary, as first user will wake it. */
16228+ wake_up_process(per_cpu(posix_timer_task,cpu));
16229+ break;
16230+#ifdef CONFIG_HOTPLUG_CPU
16231+ case CPU_UP_CANCELED:
16232+ /* Unbind it from offline cpu so it can run. Fall thru. */
16233+ kthread_bind(per_cpu(posix_timer_task, cpu),
16234+ cpumask_any(cpu_online_mask));
16235+ kthread_stop(per_cpu(posix_timer_task,cpu));
16236+ per_cpu(posix_timer_task,cpu) = NULL;
16237+ break;
16238+ case CPU_DEAD:
16239+ kthread_stop(per_cpu(posix_timer_task,cpu));
16240+ per_cpu(posix_timer_task,cpu) = NULL;
16241+ break;
16242+#endif
16243+ }
16244+ return NOTIFY_OK;
16245+}
16246+
16247+/* Register at highest priority so that task migration (migrate_all_tasks)
16248+ * happens before everything else.
16249+ */
16250+static struct notifier_block posix_cpu_thread_notifier = {
16251+ .notifier_call = posix_cpu_thread_call,
16252+ .priority = 10
16253+};
16254+
16255+static int __init posix_cpu_thread_init(void)
16256+{
16257+ void *hcpu = (void *)(long)smp_processor_id();
16258+ /* Start one for boot CPU. */
16259+ unsigned long cpu;
16260+
16261+ /* init the per-cpu posix_timer_tasklets */
16262+ for_each_possible_cpu(cpu)
16263+ per_cpu(posix_timer_tasklist, cpu) = NULL;
16264+
16265+ posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
16266+ posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
16267+ register_cpu_notifier(&posix_cpu_thread_notifier);
16268+ return 0;
16269+}
16270+early_initcall(posix_cpu_thread_init);
16271+#else /* CONFIG_PREEMPT_RT_BASE */
16272+void run_posix_cpu_timers(struct task_struct *tsk)
16273+{
16274+ __run_posix_cpu_timers(tsk);
16275+}
16276+#endif /* CONFIG_PREEMPT_RT_BASE */
16277+
16278 /*
16279 * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
16280 * The tsk->sighand->siglock must be held by the caller.
16281diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
16282index e885be1..1d5e435 100644
16283--- a/kernel/posix-timers.c
16284+++ b/kernel/posix-timers.c
16285@@ -439,6 +439,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
16286 static struct pid *good_sigevent(sigevent_t * event)
16287 {
16288 struct task_struct *rtn = current->group_leader;
16289+ int sig = event->sigev_signo;
16290
16291 if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
16292 (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
16293@@ -447,7 +448,8 @@ static struct pid *good_sigevent(sigevent_t * event)
16294 return NULL;
16295
16296 if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
16297- ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
16298+ (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
16299+ sig_kernel_coredump(sig)))
16300 return NULL;
16301
16302 return task_pid(rtn);
16303@@ -771,6 +773,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
16304 return overrun;
16305 }
16306
16307+/*
16308+ * Protected by RCU!
16309+ */
16310+static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
16311+{
16312+#ifdef CONFIG_PREEMPT_RT_FULL
16313+ if (kc->timer_set == common_timer_set)
16314+ hrtimer_wait_for_timer(&timr->it.real.timer);
16315+ else
16316+ /* FIXME: Whacky hack for posix-cpu-timers */
16317+ schedule_timeout(1);
16318+#endif
16319+}
16320+
16321 /* Set a POSIX.1b interval timer. */
16322 /* timr->it_lock is taken. */
16323 static int
16324@@ -848,6 +864,7 @@ retry:
16325 if (!timr)
16326 return -EINVAL;
16327
16328+ rcu_read_lock();
16329 kc = clockid_to_kclock(timr->it_clock);
16330 if (WARN_ON_ONCE(!kc || !kc->timer_set))
16331 error = -EINVAL;
16332@@ -856,9 +873,12 @@ retry:
16333
16334 unlock_timer(timr, flag);
16335 if (error == TIMER_RETRY) {
16336+ timer_wait_for_callback(kc, timr);
16337 rtn = NULL; // We already got the old time...
16338+ rcu_read_unlock();
16339 goto retry;
16340 }
16341+ rcu_read_unlock();
16342
16343 if (old_setting && !error &&
16344 copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
16345@@ -896,10 +916,15 @@ retry_delete:
16346 if (!timer)
16347 return -EINVAL;
16348
16349+ rcu_read_lock();
16350 if (timer_delete_hook(timer) == TIMER_RETRY) {
16351 unlock_timer(timer, flags);
16352+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
16353+ timer);
16354+ rcu_read_unlock();
16355 goto retry_delete;
16356 }
16357+ rcu_read_unlock();
16358
16359 spin_lock(&current->sighand->siglock);
16360 list_del(&timer->list);
16361@@ -925,8 +950,18 @@ static void itimer_delete(struct k_itimer *timer)
16362 retry_delete:
16363 spin_lock_irqsave(&timer->it_lock, flags);
16364
16365+ /* On RT we can race with a deletion */
16366+ if (!timer->it_signal) {
16367+ unlock_timer(timer, flags);
16368+ return;
16369+ }
16370+
16371 if (timer_delete_hook(timer) == TIMER_RETRY) {
16372+ rcu_read_lock();
16373 unlock_timer(timer, flags);
16374+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
16375+ timer);
16376+ rcu_read_unlock();
16377 goto retry_delete;
16378 }
16379 list_del(&timer->list);
16380diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
16381index b26f5f1..3321e2b 100644
16382--- a/kernel/power/hibernate.c
16383+++ b/kernel/power/hibernate.c
16384@@ -275,6 +275,8 @@ static int create_image(int platform_mode)
16385
16386 local_irq_disable();
16387
16388+ system_state = SYSTEM_SUSPEND;
16389+
16390 error = syscore_suspend();
16391 if (error) {
16392 printk(KERN_ERR "PM: Some system devices failed to power down, "
16393@@ -302,6 +304,7 @@ static int create_image(int platform_mode)
16394 syscore_resume();
16395
16396 Enable_irqs:
16397+ system_state = SYSTEM_RUNNING;
16398 local_irq_enable();
16399
16400 Enable_cpus:
16401@@ -427,6 +430,7 @@ static int resume_target_kernel(bool platform_mode)
16402 goto Enable_cpus;
16403
16404 local_irq_disable();
16405+ system_state = SYSTEM_SUSPEND;
16406
16407 error = syscore_suspend();
16408 if (error)
16409@@ -460,6 +464,7 @@ static int resume_target_kernel(bool platform_mode)
16410 syscore_resume();
16411
16412 Enable_irqs:
16413+ system_state = SYSTEM_RUNNING;
16414 local_irq_enable();
16415
16416 Enable_cpus:
16417@@ -542,6 +547,7 @@ int hibernation_platform_enter(void)
16418 goto Platform_finish;
16419
16420 local_irq_disable();
16421+ system_state = SYSTEM_SUSPEND;
16422 syscore_suspend();
16423 if (pm_wakeup_pending()) {
16424 error = -EAGAIN;
16425@@ -554,6 +560,7 @@ int hibernation_platform_enter(void)
16426
16427 Power_up:
16428 syscore_resume();
16429+ system_state = SYSTEM_RUNNING;
16430 local_irq_enable();
16431 enable_nonboot_cpus();
16432
16433diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
16434index c8b7446..ff2dade 100644
16435--- a/kernel/power/suspend.c
16436+++ b/kernel/power/suspend.c
16437@@ -165,6 +165,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
16438 arch_suspend_disable_irqs();
16439 BUG_ON(!irqs_disabled());
16440
16441+ system_state = SYSTEM_SUSPEND;
16442+
16443 error = syscore_suspend();
16444 if (!error) {
16445 *wakeup = pm_wakeup_pending();
16446@@ -175,6 +177,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
16447 syscore_resume();
16448 }
16449
16450+ system_state = SYSTEM_RUNNING;
16451+
16452 arch_suspend_enable_irqs();
16453 BUG_ON(irqs_disabled());
16454
16455diff --git a/kernel/printk.c b/kernel/printk.c
16456index 267ce78..6d52c34 100644
16457--- a/kernel/printk.c
16458+++ b/kernel/printk.c
16459@@ -48,13 +48,6 @@
16460 #define CREATE_TRACE_POINTS
16461 #include <trace/events/printk.h>
16462
16463-/*
16464- * Architectures can override it:
16465- */
16466-void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
16467-{
16468-}
16469-
16470 /* printk's without a loglevel use this.. */
16471 #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
16472
16473@@ -756,6 +749,62 @@ module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
16474 MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
16475 "print all kernel messages to the console.");
16476
16477+#ifdef CONFIG_EARLY_PRINTK
16478+struct console *early_console;
16479+
16480+void early_vprintk(const char *fmt, va_list ap)
16481+{
16482+ if (early_console) {
16483+ char buf[512];
16484+ int n = vscnprintf(buf, sizeof(buf), fmt, ap);
16485+
16486+ early_console->write(early_console, buf, n);
16487+ }
16488+}
16489+
16490+asmlinkage void early_printk(const char *fmt, ...)
16491+{
16492+ va_list ap;
16493+
16494+ va_start(ap, fmt);
16495+ early_vprintk(fmt, ap);
16496+ va_end(ap);
16497+}
16498+
16499+/*
16500+ * This is independent of any log levels - a global
16501+ * kill switch that turns off all of printk.
16502+ *
16503+ * Used by the NMI watchdog if early-printk is enabled.
16504+ */
16505+static bool __read_mostly printk_killswitch;
16506+
16507+static int __init force_early_printk_setup(char *str)
16508+{
16509+ printk_killswitch = true;
16510+ return 0;
16511+}
16512+early_param("force_early_printk", force_early_printk_setup);
16513+
16514+void printk_kill(void)
16515+{
16516+ printk_killswitch = true;
16517+}
16518+
16519+static int forced_early_printk(const char *fmt, va_list ap)
16520+{
16521+ if (!printk_killswitch)
16522+ return 0;
16523+ early_vprintk(fmt, ap);
16524+ return 1;
16525+}
16526+#else
16527+static inline int forced_early_printk(const char *fmt, va_list ap)
16528+{
16529+ return 0;
16530+}
16531+#endif
16532+
16533 #ifdef CONFIG_BOOT_PRINTK_DELAY
16534
16535 static int boot_delay; /* msecs delay after each printk during bootup */
16536@@ -1023,6 +1072,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
16537 {
16538 char *text;
16539 int len = 0;
16540+ int attempts = 0;
16541
16542 text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
16543 if (!text)
16544@@ -1034,7 +1084,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
16545 u64 seq;
16546 u32 idx;
16547 enum log_flags prev;
16548-
16549+ int num_msg;
16550+try_again:
16551+ attempts++;
16552+ if (attempts > 10) {
16553+ len = -EBUSY;
16554+ goto out;
16555+ }
16556+ num_msg = 0;
16557 if (clear_seq < log_first_seq) {
16558 /* messages are gone, move to first available one */
16559 clear_seq = log_first_seq;
16560@@ -1055,6 +1112,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
16561 prev = msg->flags;
16562 idx = log_next(idx);
16563 seq++;
16564+ num_msg++;
16565+ if (num_msg > 5) {
16566+ num_msg = 0;
16567+ raw_spin_unlock_irq(&logbuf_lock);
16568+ raw_spin_lock_irq(&logbuf_lock);
16569+ if (clear_seq < log_first_seq)
16570+ goto try_again;
16571+ }
16572 }
16573
16574 /* move first record forward until length fits into the buffer */
16575@@ -1068,6 +1133,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
16576 prev = msg->flags;
16577 idx = log_next(idx);
16578 seq++;
16579+ num_msg++;
16580+ if (num_msg > 5) {
16581+ num_msg = 0;
16582+ raw_spin_unlock_irq(&logbuf_lock);
16583+ raw_spin_lock_irq(&logbuf_lock);
16584+ if (clear_seq < log_first_seq)
16585+ goto try_again;
16586+ }
16587 }
16588
16589 /* last message fitting into this dump */
16590@@ -1109,6 +1182,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
16591 clear_seq = log_next_seq;
16592 clear_idx = log_next_idx;
16593 }
16594+out:
16595 raw_spin_unlock_irq(&logbuf_lock);
16596
16597 kfree(text);
16598@@ -1266,6 +1340,7 @@ static void call_console_drivers(int level, const char *text, size_t len)
16599 if (!console_drivers)
16600 return;
16601
16602+ migrate_disable();
16603 for_each_console(con) {
16604 if (exclusive_console && con != exclusive_console)
16605 continue;
16606@@ -1278,6 +1353,7 @@ static void call_console_drivers(int level, const char *text, size_t len)
16607 continue;
16608 con->write(con, text, len);
16609 }
16610+ migrate_enable();
16611 }
16612
16613 /*
16614@@ -1337,12 +1413,18 @@ static inline int can_use_console(unsigned int cpu)
16615 * interrupts disabled. It should return with 'lockbuf_lock'
16616 * released but interrupts still disabled.
16617 */
16618-static int console_trylock_for_printk(unsigned int cpu)
16619+static int console_trylock_for_printk(unsigned int cpu, unsigned long flags)
16620 __releases(&logbuf_lock)
16621 {
16622 int retval = 0, wake = 0;
16623+#ifdef CONFIG_PREEMPT_RT_FULL
16624+ int lock = !early_boot_irqs_disabled && !irqs_disabled_flags(flags) &&
16625+ (preempt_count() <= 1);
16626+#else
16627+ int lock = 1;
16628+#endif
16629
16630- if (console_trylock()) {
16631+ if (lock && console_trylock()) {
16632 retval = 1;
16633
16634 /*
16635@@ -1358,9 +1440,9 @@ static int console_trylock_for_printk(unsigned int cpu)
16636 }
16637 }
16638 logbuf_cpu = UINT_MAX;
16639+ raw_spin_unlock(&logbuf_lock);
16640 if (wake)
16641 up(&console_sem);
16642- raw_spin_unlock(&logbuf_lock);
16643 return retval;
16644 }
16645
16646@@ -1495,6 +1577,13 @@ asmlinkage int vprintk_emit(int facility, int level,
16647 int this_cpu;
16648 int printed_len = 0;
16649
16650+ /*
16651+ * Fall back to early_printk if a debugging subsystem has
16652+ * killed printk output
16653+ */
16654+ if (unlikely(forced_early_printk(fmt, args)))
16655+ return 1;
16656+
16657 boot_delay_msec(level);
16658 printk_delay();
16659
16660@@ -1614,8 +1703,15 @@ asmlinkage int vprintk_emit(int facility, int level,
16661 * The console_trylock_for_printk() function will release 'logbuf_lock'
16662 * regardless of whether it actually gets the console semaphore or not.
16663 */
16664- if (console_trylock_for_printk(this_cpu))
16665+ if (console_trylock_for_printk(this_cpu, flags)) {
16666+#ifndef CONFIG_PREEMPT_RT_FULL
16667+ console_unlock();
16668+#else
16669+ raw_local_irq_restore(flags);
16670 console_unlock();
16671+ raw_local_irq_save(flags);
16672+#endif
16673+ }
16674
16675 lockdep_on();
16676 out_restore_irqs:
16677@@ -1974,8 +2070,8 @@ void printk_tick(void)
16678
16679 int printk_needs_cpu(int cpu)
16680 {
16681- if (cpu_is_offline(cpu))
16682- printk_tick();
16683+ if (unlikely(cpu_is_offline(cpu)))
16684+ __this_cpu_write(printk_pending, 0);
16685 return __this_cpu_read(printk_pending);
16686 }
16687
16688@@ -2004,11 +2100,16 @@ static void console_cont_flush(char *text, size_t size)
16689 goto out;
16690
16691 len = cont_print_text(text, size);
16692+#ifndef CONFIG_PREEMPT_RT_FULL
16693 raw_spin_unlock(&logbuf_lock);
16694 stop_critical_timings();
16695 call_console_drivers(cont.level, text, len);
16696 start_critical_timings();
16697 local_irq_restore(flags);
16698+#else
16699+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
16700+ call_console_drivers(cont.level, text, len);
16701+#endif
16702 return;
16703 out:
16704 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
16705@@ -2091,12 +2192,17 @@ skip:
16706 console_idx = log_next(console_idx);
16707 console_seq++;
16708 console_prev = msg->flags;
16709- raw_spin_unlock(&logbuf_lock);
16710
16711+#ifndef CONFIG_PREEMPT_RT_FULL
16712+ raw_spin_unlock(&logbuf_lock);
16713 stop_critical_timings(); /* don't trace print latency */
16714 call_console_drivers(level, text, len);
16715 start_critical_timings();
16716 local_irq_restore(flags);
16717+#else
16718+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
16719+ call_console_drivers(level, text, len);
16720+#endif
16721 }
16722 console_locked = 0;
16723
16724diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
16725index a2cf761..8906a79 100644
16726--- a/kernel/rcupdate.c
16727+++ b/kernel/rcupdate.c
16728@@ -156,6 +156,7 @@ int debug_lockdep_rcu_enabled(void)
16729 }
16730 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
16731
16732+#ifndef CONFIG_PREEMPT_RT_FULL
16733 /**
16734 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
16735 *
16736@@ -182,6 +183,7 @@ int rcu_read_lock_bh_held(void)
16737 return in_softirq() || irqs_disabled();
16738 }
16739 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
16740+#endif
16741
16742 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
16743
16744diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
16745index e7dce58..9bc2fe6 100644
16746--- a/kernel/rcutiny.c
16747+++ b/kernel/rcutiny.c
16748@@ -371,6 +371,7 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
16749 }
16750 EXPORT_SYMBOL_GPL(call_rcu_sched);
16751
16752+#ifndef CONFIG_PREEMPT_RT_FULL
16753 /*
16754 * Post an RCU bottom-half callback to be invoked after any subsequent
16755 * quiescent state.
16756@@ -380,3 +381,4 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
16757 __call_rcu(head, func, &rcu_bh_ctrlblk);
16758 }
16759 EXPORT_SYMBOL_GPL(call_rcu_bh);
16760+#endif
16761diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
16762index f85016a..14cae12 100644
16763--- a/kernel/rcutiny_plugin.h
16764+++ b/kernel/rcutiny_plugin.h
16765@@ -26,6 +26,7 @@
16766 #include <linux/module.h>
16767 #include <linux/debugfs.h>
16768 #include <linux/seq_file.h>
16769+#include <linux/wait-simple.h>
16770
16771 /* Global control variables for rcupdate callback mechanism. */
16772 struct rcu_ctrlblk {
16773@@ -260,7 +261,7 @@ static void show_tiny_preempt_stats(struct seq_file *m)
16774
16775 /* Controls for rcu_kthread() kthread. */
16776 static struct task_struct *rcu_kthread_task;
16777-static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
16778+static DEFINE_SWAIT_HEAD(rcu_kthread_wq);
16779 static unsigned long have_rcu_kthread_work;
16780
16781 /*
16782@@ -560,7 +561,7 @@ void rcu_read_unlock_special(struct task_struct *t)
16783 rcu_preempt_cpu_qs();
16784
16785 /* Hardware IRQ handlers cannot block. */
16786- if (in_irq() || in_serving_softirq()) {
16787+ if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
16788 local_irq_restore(flags);
16789 return;
16790 }
16791@@ -713,7 +714,7 @@ void synchronize_rcu(void)
16792 }
16793 EXPORT_SYMBOL_GPL(synchronize_rcu);
16794
16795-static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
16796+static DEFINE_SWAIT_HEAD(sync_rcu_preempt_exp_wq);
16797 static unsigned long sync_rcu_preempt_exp_count;
16798 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
16799
16800@@ -735,7 +736,7 @@ static int rcu_preempted_readers_exp(void)
16801 */
16802 static void rcu_report_exp_done(void)
16803 {
16804- wake_up(&sync_rcu_preempt_exp_wq);
16805+ swait_wake(&sync_rcu_preempt_exp_wq);
16806 }
16807
16808 /*
16809@@ -787,8 +788,8 @@ void synchronize_rcu_expedited(void)
16810 } else {
16811 rcu_initiate_boost();
16812 local_irq_restore(flags);
16813- wait_event(sync_rcu_preempt_exp_wq,
16814- !rcu_preempted_readers_exp());
16815+ swait_event(sync_rcu_preempt_exp_wq,
16816+ !rcu_preempted_readers_exp());
16817 }
16818
16819 /* Clean up and exit. */
16820@@ -858,7 +859,7 @@ static void invoke_rcu_callbacks(void)
16821 {
16822 have_rcu_kthread_work = 1;
16823 if (rcu_kthread_task != NULL)
16824- wake_up(&rcu_kthread_wq);
16825+ swait_wake(&rcu_kthread_wq);
16826 }
16827
16828 #ifdef CONFIG_RCU_TRACE
16829@@ -888,8 +889,8 @@ static int rcu_kthread(void *arg)
16830 unsigned long flags;
16831
16832 for (;;) {
16833- wait_event_interruptible(rcu_kthread_wq,
16834- have_rcu_kthread_work != 0);
16835+ swait_event_interruptible(rcu_kthread_wq,
16836+ have_rcu_kthread_work != 0);
16837 morework = rcu_boost();
16838 local_irq_save(flags);
16839 work = have_rcu_kthread_work;
16840diff --git a/kernel/rcutree.c b/kernel/rcutree.c
16841index e441b77..7ec834d 100644
16842--- a/kernel/rcutree.c
16843+++ b/kernel/rcutree.c
16844@@ -181,6 +181,14 @@ void rcu_sched_qs(int cpu)
16845 rdp->passed_quiesce = 1;
16846 }
16847
16848+#ifdef CONFIG_PREEMPT_RT_FULL
16849+static void rcu_preempt_qs(int cpu);
16850+
16851+void rcu_bh_qs(int cpu)
16852+{
16853+ rcu_preempt_qs(cpu);
16854+}
16855+#else
16856 void rcu_bh_qs(int cpu)
16857 {
16858 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
16859@@ -189,6 +197,7 @@ void rcu_bh_qs(int cpu)
16860 trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
16861 rdp->passed_quiesce = 1;
16862 }
16863+#endif
16864
16865 /*
16866 * Note a context switch. This is a quiescent state for RCU-sched,
16867@@ -242,6 +251,7 @@ long rcu_batches_completed_sched(void)
16868 }
16869 EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
16870
16871+#ifndef CONFIG_PREEMPT_RT_FULL
16872 /*
16873 * Return the number of RCU BH batches processed thus far for debug & stats.
16874 */
16875@@ -259,6 +269,7 @@ void rcu_bh_force_quiescent_state(void)
16876 force_quiescent_state(&rcu_bh_state);
16877 }
16878 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
16879+#endif
16880
16881 /*
16882 * Record the number of times rcutorture tests have been initiated and
16883@@ -1308,7 +1319,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
16884
16885 /* Handle grace-period start. */
16886 for (;;) {
16887- wait_event_interruptible(rsp->gp_wq,
16888+ swait_event_interruptible(rsp->gp_wq,
16889 rsp->gp_flags &
16890 RCU_GP_FLAG_INIT);
16891 if ((rsp->gp_flags & RCU_GP_FLAG_INIT) &&
16892@@ -1327,7 +1338,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
16893 }
16894 for (;;) {
16895 rsp->jiffies_force_qs = jiffies + j;
16896- ret = wait_event_interruptible_timeout(rsp->gp_wq,
16897+ ret = swait_event_interruptible_timeout(rsp->gp_wq,
16898 (rsp->gp_flags & RCU_GP_FLAG_FQS) ||
16899 (!ACCESS_ONCE(rnp->qsmask) &&
16900 !rcu_preempt_blocked_readers_cgp(rnp)),
16901@@ -1412,7 +1423,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
16902 local_irq_restore(flags);
16903
16904 /* Wake up rcu_gp_kthread() to start the grace period. */
16905- wake_up(&rsp->gp_wq);
16906+ swait_wake(&rsp->gp_wq);
16907 }
16908
16909 /*
16910@@ -1427,7 +1438,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
16911 {
16912 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
16913 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
16914- wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
16915+ swait_wake(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
16916 }
16917
16918 /*
16919@@ -1992,7 +2003,8 @@ static void force_quiescent_state(struct rcu_state *rsp)
16920 }
16921 rsp->gp_flags |= RCU_GP_FLAG_FQS;
16922 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
16923- wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
16924+ /* Memory barrier implied by wake_up() path. */
16925+ swait_wake(&rsp->gp_wq);
16926 }
16927
16928 /*
16929@@ -2183,6 +2195,7 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
16930 }
16931 EXPORT_SYMBOL_GPL(call_rcu_sched);
16932
16933+#ifndef CONFIG_PREEMPT_RT_FULL
16934 /*
16935 * Queue an RCU callback for invocation after a quicker grace period.
16936 */
16937@@ -2191,6 +2204,7 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
16938 __call_rcu(head, func, &rcu_bh_state, -1, 0);
16939 }
16940 EXPORT_SYMBOL_GPL(call_rcu_bh);
16941+#endif
16942
16943 /*
16944 * Because a context switch is a grace period for RCU-sched and RCU-bh,
16945@@ -2268,6 +2282,7 @@ void synchronize_sched(void)
16946 }
16947 EXPORT_SYMBOL_GPL(synchronize_sched);
16948
16949+#ifndef CONFIG_PREEMPT_RT_FULL
16950 /**
16951 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
16952 *
16953@@ -2294,6 +2309,7 @@ void synchronize_rcu_bh(void)
16954 wait_rcu_gp(call_rcu_bh);
16955 }
16956 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
16957+#endif
16958
16959 static int synchronize_sched_expedited_cpu_stop(void *data)
16960 {
16961@@ -2682,6 +2698,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
16962 mutex_unlock(&rsp->barrier_mutex);
16963 }
16964
16965+#ifndef CONFIG_PREEMPT_RT_FULL
16966 /**
16967 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
16968 */
16969@@ -2690,6 +2707,7 @@ void rcu_barrier_bh(void)
16970 _rcu_barrier(&rcu_bh_state);
16971 }
16972 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
16973+#endif
16974
16975 /**
16976 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
16977@@ -2982,7 +3000,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
16978 }
16979
16980 rsp->rda = rda;
16981- init_waitqueue_head(&rsp->gp_wq);
16982+ init_swait_head(&rsp->gp_wq);
16983 rnp = rsp->level[rcu_num_lvls - 1];
16984 for_each_possible_cpu(i) {
16985 while (i > rnp->grphi)
16986diff --git a/kernel/rcutree.h b/kernel/rcutree.h
16987index 4b69291..5cfdff9 100644
16988--- a/kernel/rcutree.h
16989+++ b/kernel/rcutree.h
16990@@ -397,7 +397,7 @@ struct rcu_state {
16991 unsigned long gpnum; /* Current gp number. */
16992 unsigned long completed; /* # of last completed gp. */
16993 struct task_struct *gp_kthread; /* Task for grace periods. */
16994- wait_queue_head_t gp_wq; /* Where GP task waits. */
16995+ struct swait_head gp_wq; /* Where GP task waits. */
16996 int gp_flags; /* Commands for GP task. */
16997
16998 /* End of fields guarded by root rcu_node's lock. */
16999diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
17000index c1cc7e1..778f138 100644
17001--- a/kernel/rcutree_plugin.h
17002+++ b/kernel/rcutree_plugin.h
17003@@ -351,7 +351,7 @@ void rcu_read_unlock_special(struct task_struct *t)
17004 }
17005
17006 /* Hardware IRQ handlers cannot block. */
17007- if (in_irq() || in_serving_softirq()) {
17008+ if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
17009 local_irq_restore(flags);
17010 return;
17011 }
17012@@ -1519,7 +1519,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
17013
17014 #endif /* #else #ifdef CONFIG_RCU_BOOST */
17015
17016-#if !defined(CONFIG_RCU_FAST_NO_HZ)
17017+#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
17018
17019 /*
17020 * Check to see if any future RCU-related work will need to be done
17021@@ -1535,6 +1535,9 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
17022 *delta_jiffies = ULONG_MAX;
17023 return rcu_cpu_has_callbacks(cpu);
17024 }
17025+#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
17026+
17027+#if !defined(CONFIG_RCU_FAST_NO_HZ)
17028
17029 /*
17030 * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
17031@@ -1651,6 +1654,7 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
17032 rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
17033 }
17034
17035+#ifndef CONFIG_PREEMPT_RT_FULL
17036 /*
17037 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
17038 * callbacks on this CPU, (2) this CPU has not yet attempted to enter
17039@@ -1694,6 +1698,7 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
17040 }
17041 return 0;
17042 }
17043+#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
17044
17045 /*
17046 * Handler for smp_call_function_single(). The only point of this
17047diff --git a/kernel/relay.c b/kernel/relay.c
17048index e8cd202..56ba44f 100644
17049--- a/kernel/relay.c
17050+++ b/kernel/relay.c
17051@@ -340,6 +340,10 @@ static void wakeup_readers(unsigned long data)
17052 {
17053 struct rchan_buf *buf = (struct rchan_buf *)data;
17054 wake_up_interruptible(&buf->read_wait);
17055+ /*
17056+ * Stupid polling for now:
17057+ */
17058+ mod_timer(&buf->timer, jiffies + 1);
17059 }
17060
17061 /**
17062@@ -357,6 +361,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init)
17063 init_waitqueue_head(&buf->read_wait);
17064 kref_init(&buf->kref);
17065 setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
17066+ mod_timer(&buf->timer, jiffies + 1);
17067 } else
17068 del_timer_sync(&buf->timer);
17069
17070@@ -739,15 +744,6 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
17071 else
17072 buf->early_bytes += buf->chan->subbuf_size -
17073 buf->padding[old_subbuf];
17074- smp_mb();
17075- if (waitqueue_active(&buf->read_wait))
17076- /*
17077- * Calling wake_up_interruptible() from here
17078- * will deadlock if we happen to be logging
17079- * from the scheduler (trying to re-grab
17080- * rq->lock), so defer it.
17081- */
17082- mod_timer(&buf->timer, jiffies + 1);
17083 }
17084
17085 old = buf->data;
17086diff --git a/kernel/res_counter.c b/kernel/res_counter.c
17087index ff55247..cfecbee 100644
17088--- a/kernel/res_counter.c
17089+++ b/kernel/res_counter.c
17090@@ -49,7 +49,7 @@ static int __res_counter_charge(struct res_counter *counter, unsigned long val,
17091
17092 r = ret = 0;
17093 *limit_fail_at = NULL;
17094- local_irq_save(flags);
17095+ local_irq_save_nort(flags);
17096 for (c = counter; c != NULL; c = c->parent) {
17097 spin_lock(&c->lock);
17098 r = res_counter_charge_locked(c, val, force);
17099@@ -69,7 +69,7 @@ static int __res_counter_charge(struct res_counter *counter, unsigned long val,
17100 spin_unlock(&u->lock);
17101 }
17102 }
17103- local_irq_restore(flags);
17104+ local_irq_restore_nort(flags);
17105
17106 return ret;
17107 }
17108@@ -103,7 +103,7 @@ u64 res_counter_uncharge_until(struct res_counter *counter,
17109 struct res_counter *c;
17110 u64 ret = 0;
17111
17112- local_irq_save(flags);
17113+ local_irq_save_nort(flags);
17114 for (c = counter; c != top; c = c->parent) {
17115 u64 r;
17116 spin_lock(&c->lock);
17117@@ -112,7 +112,7 @@ u64 res_counter_uncharge_until(struct res_counter *counter,
17118 ret = r;
17119 spin_unlock(&c->lock);
17120 }
17121- local_irq_restore(flags);
17122+ local_irq_restore_nort(flags);
17123 return ret;
17124 }
17125
17126diff --git a/kernel/rt.c b/kernel/rt.c
17127new file mode 100644
17128index 0000000..433ae42
17129--- /dev/null
17130+++ b/kernel/rt.c
17131@@ -0,0 +1,453 @@
17132+/*
17133+ * kernel/rt.c
17134+ *
17135+ * Real-Time Preemption Support
17136+ *
17137+ * started by Ingo Molnar:
17138+ *
17139+ * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
17140+ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
17141+ *
17142+ * historic credit for proving that Linux spinlocks can be implemented via
17143+ * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
17144+ * and others) who prototyped it on 2.4 and did lots of comparative
17145+ * research and analysis; TimeSys, for proving that you can implement a
17146+ * fully preemptible kernel via the use of IRQ threading and mutexes;
17147+ * Bill Huey for persuasively arguing on lkml that the mutex model is the
17148+ * right one; and to MontaVista, who ported pmutexes to 2.6.
17149+ *
17150+ * This code is a from-scratch implementation and is not based on pmutexes,
17151+ * but the idea of converting spinlocks to mutexes is used here too.
17152+ *
17153+ * lock debugging, locking tree, deadlock detection:
17154+ *
17155+ * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
17156+ * Released under the General Public License (GPL).
17157+ *
17158+ * Includes portions of the generic R/W semaphore implementation from:
17159+ *
17160+ * Copyright (c) 2001 David Howells (dhowells@redhat.com).
17161+ * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
17162+ * - Derived also from comments by Linus
17163+ *
17164+ * Pending ownership of locks and ownership stealing:
17165+ *
17166+ * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
17167+ *
17168+ * (also by Steven Rostedt)
17169+ * - Converted single pi_lock to individual task locks.
17170+ *
17171+ * By Esben Nielsen:
17172+ * Doing priority inheritance with help of the scheduler.
17173+ *
17174+ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
17175+ * - major rework based on Esben Nielsens initial patch
17176+ * - replaced thread_info references by task_struct refs
17177+ * - removed task->pending_owner dependency
17178+ * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
17179+ * in the scheduler return path as discussed with Steven Rostedt
17180+ *
17181+ * Copyright (C) 2006, Kihon Technologies Inc.
17182+ * Steven Rostedt <rostedt@goodmis.org>
17183+ * - debugged and patched Thomas Gleixner's rework.
17184+ * - added back the cmpxchg to the rework.
17185+ * - turned atomic require back on for SMP.
17186+ */
17187+
17188+#include <linux/spinlock.h>
17189+#include <linux/rtmutex.h>
17190+#include <linux/sched.h>
17191+#include <linux/delay.h>
17192+#include <linux/module.h>
17193+#include <linux/kallsyms.h>
17194+#include <linux/syscalls.h>
17195+#include <linux/interrupt.h>
17196+#include <linux/plist.h>
17197+#include <linux/fs.h>
17198+#include <linux/futex.h>
17199+#include <linux/hrtimer.h>
17200+
17201+#include "rtmutex_common.h"
17202+
17203+/*
17204+ * struct mutex functions
17205+ */
17206+void __mutex_do_init(struct mutex *mutex, const char *name,
17207+ struct lock_class_key *key)
17208+{
17209+#ifdef CONFIG_DEBUG_LOCK_ALLOC
17210+ /*
17211+ * Make sure we are not reinitializing a held lock:
17212+ */
17213+ debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
17214+ lockdep_init_map(&mutex->dep_map, name, key, 0);
17215+#endif
17216+ mutex->lock.save_state = 0;
17217+}
17218+EXPORT_SYMBOL(__mutex_do_init);
17219+
17220+void __lockfunc _mutex_lock(struct mutex *lock)
17221+{
17222+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
17223+ rt_mutex_lock(&lock->lock);
17224+}
17225+EXPORT_SYMBOL(_mutex_lock);
17226+
17227+int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
17228+{
17229+ int ret;
17230+
17231+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
17232+ ret = rt_mutex_lock_interruptible(&lock->lock, 0);
17233+ if (ret)
17234+ mutex_release(&lock->dep_map, 1, _RET_IP_);
17235+ return ret;
17236+}
17237+EXPORT_SYMBOL(_mutex_lock_interruptible);
17238+
17239+int __lockfunc _mutex_lock_killable(struct mutex *lock)
17240+{
17241+ int ret;
17242+
17243+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
17244+ ret = rt_mutex_lock_killable(&lock->lock, 0);
17245+ if (ret)
17246+ mutex_release(&lock->dep_map, 1, _RET_IP_);
17247+ return ret;
17248+}
17249+EXPORT_SYMBOL(_mutex_lock_killable);
17250+
17251+#ifdef CONFIG_DEBUG_LOCK_ALLOC
17252+void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
17253+{
17254+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
17255+ rt_mutex_lock(&lock->lock);
17256+}
17257+EXPORT_SYMBOL(_mutex_lock_nested);
17258+
17259+void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
17260+{
17261+ mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
17262+ rt_mutex_lock(&lock->lock);
17263+}
17264+EXPORT_SYMBOL(_mutex_lock_nest_lock);
17265+
17266+int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
17267+{
17268+ int ret;
17269+
17270+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
17271+ ret = rt_mutex_lock_interruptible(&lock->lock, 0);
17272+ if (ret)
17273+ mutex_release(&lock->dep_map, 1, _RET_IP_);
17274+ return ret;
17275+}
17276+EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
17277+
17278+int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
17279+{
17280+ int ret;
17281+
17282+ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
17283+ ret = rt_mutex_lock_killable(&lock->lock, 0);
17284+ if (ret)
17285+ mutex_release(&lock->dep_map, 1, _RET_IP_);
17286+ return ret;
17287+}
17288+EXPORT_SYMBOL(_mutex_lock_killable_nested);
17289+#endif
17290+
17291+int __lockfunc _mutex_trylock(struct mutex *lock)
17292+{
17293+ int ret = rt_mutex_trylock(&lock->lock);
17294+
17295+ if (ret)
17296+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
17297+
17298+ return ret;
17299+}
17300+EXPORT_SYMBOL(_mutex_trylock);
17301+
17302+void __lockfunc _mutex_unlock(struct mutex *lock)
17303+{
17304+ mutex_release(&lock->dep_map, 1, _RET_IP_);
17305+ rt_mutex_unlock(&lock->lock);
17306+}
17307+EXPORT_SYMBOL(_mutex_unlock);
17308+
17309+/*
17310+ * rwlock_t functions
17311+ */
17312+int __lockfunc rt_write_trylock(rwlock_t *rwlock)
17313+{
17314+ int ret = rt_mutex_trylock(&rwlock->lock);
17315+
17316+ migrate_disable();
17317+ if (ret)
17318+ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
17319+ else
17320+ migrate_enable();
17321+
17322+ return ret;
17323+}
17324+EXPORT_SYMBOL(rt_write_trylock);
17325+
17326+int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
17327+{
17328+ int ret;
17329+
17330+ *flags = 0;
17331+ migrate_disable();
17332+ ret = rt_write_trylock(rwlock);
17333+ if (!ret)
17334+ migrate_enable();
17335+ return ret;
17336+}
17337+EXPORT_SYMBOL(rt_write_trylock_irqsave);
17338+
17339+int __lockfunc rt_read_trylock(rwlock_t *rwlock)
17340+{
17341+ struct rt_mutex *lock = &rwlock->lock;
17342+ int ret = 1;
17343+
17344+ /*
17345+ * recursive read locks succeed when current owns the lock,
17346+ * but not when read_depth == 0 which means that the lock is
17347+ * write locked.
17348+ */
17349+ migrate_disable();
17350+ if (rt_mutex_owner(lock) != current) {
17351+ ret = rt_mutex_trylock(lock);
17352+ if (ret)
17353+ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
17354+ } else if (!rwlock->read_depth) {
17355+ ret = 0;
17356+ }
17357+
17358+ if (ret)
17359+ rwlock->read_depth++;
17360+ else
17361+ migrate_enable();
17362+
17363+ return ret;
17364+}
17365+EXPORT_SYMBOL(rt_read_trylock);
17366+
17367+void __lockfunc rt_write_lock(rwlock_t *rwlock)
17368+{
17369+ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
17370+ __rt_spin_lock(&rwlock->lock);
17371+}
17372+EXPORT_SYMBOL(rt_write_lock);
17373+
17374+void __lockfunc rt_read_lock(rwlock_t *rwlock)
17375+{
17376+ struct rt_mutex *lock = &rwlock->lock;
17377+
17378+ /*
17379+ * recursive read locks succeed when current owns the lock
17380+ */
17381+ if (rt_mutex_owner(lock) != current) {
17382+ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
17383+ __rt_spin_lock(lock);
17384+ }
17385+ rwlock->read_depth++;
17386+}
17387+
17388+EXPORT_SYMBOL(rt_read_lock);
17389+
17390+void __lockfunc rt_write_unlock(rwlock_t *rwlock)
17391+{
17392+ /* NOTE: we always pass in '1' for nested, for simplicity */
17393+ rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
17394+ __rt_spin_unlock(&rwlock->lock);
17395+}
17396+EXPORT_SYMBOL(rt_write_unlock);
17397+
17398+void __lockfunc rt_read_unlock(rwlock_t *rwlock)
17399+{
17400+ /* Release the lock only when read_depth is down to 0 */
17401+ if (--rwlock->read_depth == 0) {
17402+ rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
17403+ __rt_spin_unlock(&rwlock->lock);
17404+ }
17405+}
17406+EXPORT_SYMBOL(rt_read_unlock);
17407+
17408+unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
17409+{
17410+ rt_write_lock(rwlock);
17411+
17412+ return 0;
17413+}
17414+EXPORT_SYMBOL(rt_write_lock_irqsave);
17415+
17416+unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
17417+{
17418+ rt_read_lock(rwlock);
17419+
17420+ return 0;
17421+}
17422+EXPORT_SYMBOL(rt_read_lock_irqsave);
17423+
17424+void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
17425+{
17426+#ifdef CONFIG_DEBUG_LOCK_ALLOC
17427+ /*
17428+ * Make sure we are not reinitializing a held lock:
17429+ */
17430+ debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
17431+ lockdep_init_map(&rwlock->dep_map, name, key, 0);
17432+#endif
17433+ rwlock->lock.save_state = 1;
17434+ rwlock->read_depth = 0;
17435+}
17436+EXPORT_SYMBOL(__rt_rwlock_init);
17437+
17438+/*
17439+ * rw_semaphores
17440+ */
17441+
17442+void rt_up_write(struct rw_semaphore *rwsem)
17443+{
17444+ rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
17445+ rt_mutex_unlock(&rwsem->lock);
17446+}
17447+EXPORT_SYMBOL(rt_up_write);
17448+
17449+void rt_up_read(struct rw_semaphore *rwsem)
17450+{
17451+ if (--rwsem->read_depth == 0) {
17452+ rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
17453+ rt_mutex_unlock(&rwsem->lock);
17454+ }
17455+}
17456+EXPORT_SYMBOL(rt_up_read);
17457+
17458+/*
17459+ * downgrade a write lock into a read lock
17460+ * - just wake up any readers at the front of the queue
17461+ */
17462+void rt_downgrade_write(struct rw_semaphore *rwsem)
17463+{
17464+ BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
17465+ rwsem->read_depth = 1;
17466+}
17467+EXPORT_SYMBOL(rt_downgrade_write);
17468+
17469+int rt_down_write_trylock(struct rw_semaphore *rwsem)
17470+{
17471+ int ret = rt_mutex_trylock(&rwsem->lock);
17472+
17473+ if (ret)
17474+ rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
17475+ return ret;
17476+}
17477+EXPORT_SYMBOL(rt_down_write_trylock);
17478+
17479+void rt_down_write(struct rw_semaphore *rwsem)
17480+{
17481+ rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
17482+ rt_mutex_lock(&rwsem->lock);
17483+}
17484+EXPORT_SYMBOL(rt_down_write);
17485+
17486+void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
17487+{
17488+ rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
17489+ rt_mutex_lock(&rwsem->lock);
17490+}
17491+EXPORT_SYMBOL(rt_down_write_nested);
17492+
17493+void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
17494+ struct lockdep_map *nest)
17495+{
17496+ rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_);
17497+ rt_mutex_lock(&rwsem->lock);
17498+}
17499+
17500+int rt_down_read_trylock(struct rw_semaphore *rwsem)
17501+{
17502+ struct rt_mutex *lock = &rwsem->lock;
17503+ int ret = 1;
17504+
17505+ /*
17506+ * recursive read locks succeed when current owns the rwsem,
17507+ * but not when read_depth == 0 which means that the rwsem is
17508+ * write locked.
17509+ */
17510+ if (rt_mutex_owner(lock) != current) {
17511+ ret = rt_mutex_trylock(&rwsem->lock);
17512+ if (ret)
17513+ rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
17514+ } else if (!rwsem->read_depth) {
17515+ ret = 0;
17516+ }
17517+
17518+ if (ret)
17519+ rwsem->read_depth++;
17520+ return ret;
17521+}
17522+EXPORT_SYMBOL(rt_down_read_trylock);
17523+
17524+static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
17525+{
17526+ struct rt_mutex *lock = &rwsem->lock;
17527+
17528+ if (rt_mutex_owner(lock) != current) {
17529+ rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
17530+ rt_mutex_lock(&rwsem->lock);
17531+ }
17532+ rwsem->read_depth++;
17533+}
17534+
17535+void rt_down_read(struct rw_semaphore *rwsem)
17536+{
17537+ __rt_down_read(rwsem, 0);
17538+}
17539+EXPORT_SYMBOL(rt_down_read);
17540+
17541+void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
17542+{
17543+ __rt_down_read(rwsem, subclass);
17544+}
17545+EXPORT_SYMBOL(rt_down_read_nested);
17546+
17547+void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
17548+ struct lock_class_key *key)
17549+{
17550+#ifdef CONFIG_DEBUG_LOCK_ALLOC
17551+ /*
17552+ * Make sure we are not reinitializing a held lock:
17553+ */
17554+ debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
17555+ lockdep_init_map(&rwsem->dep_map, name, key, 0);
17556+#endif
17557+ rwsem->read_depth = 0;
17558+ rwsem->lock.save_state = 0;
17559+}
17560+EXPORT_SYMBOL(__rt_rwsem_init);
17561+
17562+/**
17563+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
17564+ * @cnt: the atomic which we are to dec
17565+ * @lock: the mutex to return holding if we dec to 0
17566+ *
17567+ * return true and hold lock if we dec to 0, return false otherwise
17568+ */
17569+int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
17570+{
17571+ /* dec if we can't possibly hit 0 */
17572+ if (atomic_add_unless(cnt, -1, 1))
17573+ return 0;
17574+ /* we might hit 0, so take the lock */
17575+ mutex_lock(lock);
17576+ if (!atomic_dec_and_test(cnt)) {
17577+ /* when we actually did the dec, we didn't hit 0 */
17578+ mutex_unlock(lock);
17579+ return 0;
17580+ }
17581+ /* we hit 0, and we hold the lock */
17582+ return 1;
17583+}
17584+EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
17585diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
17586index a242e69..20742e7 100644
17587--- a/kernel/rtmutex.c
17588+++ b/kernel/rtmutex.c
17589@@ -8,6 +8,12 @@
17590 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
17591 * Copyright (C) 2006 Esben Nielsen
17592 *
17593+ * Adaptive Spinlocks:
17594+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
17595+ * and Peter Morreale,
17596+ * Adaptive Spinlocks simplification:
17597+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
17598+ *
17599 * See Documentation/rt-mutex-design.txt for details.
17600 */
17601 #include <linux/spinlock.h>
17602@@ -67,6 +73,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
17603 clear_rt_mutex_waiters(lock);
17604 }
17605
17606+static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
17607+{
17608+ return waiter && waiter != PI_WAKEUP_INPROGRESS &&
17609+ waiter != PI_REQUEUE_INPROGRESS;
17610+}
17611+
17612 /*
17613 * We can speed up the acquire/release, if the architecture
17614 * supports cmpxchg and if there's no debugging state to be set up
17615@@ -90,6 +102,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
17616 }
17617 #endif
17618
17619+static inline void init_lists(struct rt_mutex *lock)
17620+{
17621+ if (unlikely(!lock->wait_list.node_list.prev))
17622+ plist_head_init(&lock->wait_list);
17623+}
17624+
17625 /*
17626 * Calculate task priority from the waiter list priority
17627 *
17628@@ -106,6 +124,18 @@ int rt_mutex_getprio(struct task_struct *task)
17629 }
17630
17631 /*
17632+ * Called by sched_setscheduler() to check whether the priority change
17633+ * is overruled by a possible priority boosting.
17634+ */
17635+int rt_mutex_check_prio(struct task_struct *task, int newprio)
17636+{
17637+ if (!task_has_pi_waiters(task))
17638+ return 0;
17639+
17640+ return task_top_pi_waiter(task)->pi_list_entry.prio <= newprio;
17641+}
17642+
17643+/*
17644 * Adjust the priority of a task, after its pi_waiters got modified.
17645 *
17646 * This can be both boosting and unboosting. task->pi_lock must be held.
17647@@ -136,6 +166,14 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
17648 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
17649 }
17650
17651+static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
17652+{
17653+ if (waiter->savestate)
17654+ wake_up_lock_sleeper(waiter->task);
17655+ else
17656+ wake_up_process(waiter->task);
17657+}
17658+
17659 /*
17660 * Max number of times we'll walk the boosting chain:
17661 */
17662@@ -196,7 +234,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
17663 * reached or the state of the chain has changed while we
17664 * dropped the locks.
17665 */
17666- if (!waiter)
17667+ if (!rt_mutex_real_waiter(waiter))
17668 goto out_unlock_pi;
17669
17670 /*
17671@@ -247,13 +285,15 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
17672 /* Release the task */
17673 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
17674 if (!rt_mutex_owner(lock)) {
17675+ struct rt_mutex_waiter *lock_top_waiter;
17676+
17677 /*
17678 * If the requeue above changed the top waiter, then we need
17679 * to wake the new top waiter up to try to get the lock.
17680 */
17681-
17682- if (top_waiter != rt_mutex_top_waiter(lock))
17683- wake_up_process(rt_mutex_top_waiter(lock)->task);
17684+ lock_top_waiter = rt_mutex_top_waiter(lock);
17685+ if (top_waiter != lock_top_waiter)
17686+ rt_mutex_wake_waiter(lock_top_waiter);
17687 raw_spin_unlock(&lock->wait_lock);
17688 goto out_put_task;
17689 }
17690@@ -298,6 +338,25 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
17691 return ret;
17692 }
17693
17694+
17695+#define STEAL_NORMAL 0
17696+#define STEAL_LATERAL 1
17697+
17698+/*
17699+ * Note that RT tasks are excluded from lateral-steals to prevent the
17700+ * introduction of an unbounded latency
17701+ */
17702+static inline int lock_is_stealable(struct task_struct *task,
17703+ struct task_struct *pendowner, int mode)
17704+{
17705+ if (mode == STEAL_NORMAL || rt_task(task)) {
17706+ if (task->prio >= pendowner->prio)
17707+ return 0;
17708+ } else if (task->prio > pendowner->prio)
17709+ return 0;
17710+ return 1;
17711+}
17712+
17713 /*
17714 * Try to take an rt-mutex
17715 *
17716@@ -307,8 +366,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
17717 * @task: the task which wants to acquire the lock
17718 * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
17719 */
17720-static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
17721- struct rt_mutex_waiter *waiter)
17722+static int
17723+__try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
17724+ struct rt_mutex_waiter *waiter, int mode)
17725 {
17726 /*
17727 * We have to be careful here if the atomic speedups are
17728@@ -341,12 +401,14 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
17729 * 3) it is top waiter
17730 */
17731 if (rt_mutex_has_waiters(lock)) {
17732- if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
17733- if (!waiter || waiter != rt_mutex_top_waiter(lock))
17734- return 0;
17735- }
17736+ struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
17737+
17738+ if (task != pown && !lock_is_stealable(task, pown, mode))
17739+ return 0;
17740 }
17741
17742+ /* We got the lock. */
17743+
17744 if (waiter || rt_mutex_has_waiters(lock)) {
17745 unsigned long flags;
17746 struct rt_mutex_waiter *top;
17747@@ -371,7 +433,6 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
17748 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
17749 }
17750
17751- /* We got the lock. */
17752 debug_rt_mutex_lock(lock);
17753
17754 rt_mutex_set_owner(lock, task);
17755@@ -381,6 +442,13 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
17756 return 1;
17757 }
17758
17759+static inline int
17760+try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
17761+ struct rt_mutex_waiter *waiter)
17762+{
17763+ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
17764+}
17765+
17766 /*
17767 * Task blocks on lock.
17768 *
17769@@ -399,6 +467,23 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
17770 int chain_walk = 0, res;
17771
17772 raw_spin_lock_irqsave(&task->pi_lock, flags);
17773+
17774+ /*
17775+ * In the case of futex requeue PI, this will be a proxy
17776+ * lock. The task will wake unaware that it is enqueueed on
17777+ * this lock. Avoid blocking on two locks and corrupting
17778+ * pi_blocked_on via the PI_WAKEUP_INPROGRESS
17779+ * flag. futex_wait_requeue_pi() sets this when it wakes up
17780+ * before requeue (due to a signal or timeout). Do not enqueue
17781+ * the task if PI_WAKEUP_INPROGRESS is set.
17782+ */
17783+ if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
17784+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
17785+ return -EAGAIN;
17786+ }
17787+
17788+ BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
17789+
17790 __rt_mutex_adjust_prio(task);
17791 waiter->task = task;
17792 waiter->lock = lock;
17793@@ -423,7 +508,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
17794 plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
17795
17796 __rt_mutex_adjust_prio(owner);
17797- if (owner->pi_blocked_on)
17798+ if (rt_mutex_real_waiter(owner->pi_blocked_on))
17799 chain_walk = 1;
17800 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
17801 }
17802@@ -478,7 +563,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
17803
17804 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
17805
17806- wake_up_process(waiter->task);
17807+ rt_mutex_wake_waiter(waiter);
17808 }
17809
17810 /*
17811@@ -517,7 +602,7 @@ static void remove_waiter(struct rt_mutex *lock,
17812 }
17813 __rt_mutex_adjust_prio(owner);
17814
17815- if (owner->pi_blocked_on)
17816+ if (rt_mutex_real_waiter(owner->pi_blocked_on))
17817 chain_walk = 1;
17818
17819 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
17820@@ -551,23 +636,316 @@ void rt_mutex_adjust_pi(struct task_struct *task)
17821 raw_spin_lock_irqsave(&task->pi_lock, flags);
17822
17823 waiter = task->pi_blocked_on;
17824- if (!waiter || waiter->list_entry.prio == task->prio) {
17825+ if (!rt_mutex_real_waiter(waiter) ||
17826+ waiter->list_entry.prio == task->prio) {
17827 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
17828 return;
17829 }
17830
17831- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
17832-
17833 /* gets dropped in rt_mutex_adjust_prio_chain()! */
17834 get_task_struct(task);
17835+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
17836 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
17837 }
17838
17839+#ifdef CONFIG_PREEMPT_RT_FULL
17840+/*
17841+ * preemptible spin_lock functions:
17842+ */
17843+static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
17844+ void (*slowfn)(struct rt_mutex *lock))
17845+{
17846+ might_sleep();
17847+
17848+ if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
17849+ rt_mutex_deadlock_account_lock(lock, current);
17850+ else
17851+ slowfn(lock);
17852+}
17853+
17854+static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
17855+ void (*slowfn)(struct rt_mutex *lock))
17856+{
17857+ if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
17858+ rt_mutex_deadlock_account_unlock(current);
17859+ else
17860+ slowfn(lock);
17861+}
17862+
17863+#ifdef CONFIG_SMP
17864+/*
17865+ * Note that owner is a speculative pointer and dereferencing relies
17866+ * on rcu_read_lock() and the check against the lock owner.
17867+ */
17868+static int adaptive_wait(struct rt_mutex *lock,
17869+ struct task_struct *owner)
17870+{
17871+ int res = 0;
17872+
17873+ rcu_read_lock();
17874+ for (;;) {
17875+ if (owner != rt_mutex_owner(lock))
17876+ break;
17877+ /*
17878+ * Ensure that owner->on_cpu is dereferenced _after_
17879+ * checking the above to be valid.
17880+ */
17881+ barrier();
17882+ if (!owner->on_cpu) {
17883+ res = 1;
17884+ break;
17885+ }
17886+ cpu_relax();
17887+ }
17888+ rcu_read_unlock();
17889+ return res;
17890+}
17891+#else
17892+static int adaptive_wait(struct rt_mutex *lock,
17893+ struct task_struct *orig_owner)
17894+{
17895+ return 1;
17896+}
17897+#endif
17898+
17899+# define pi_lock(lock) raw_spin_lock_irq(lock)
17900+# define pi_unlock(lock) raw_spin_unlock_irq(lock)
17901+
17902+/*
17903+ * Slow path lock function spin_lock style: this variant is very
17904+ * careful not to miss any non-lock wakeups.
17905+ *
17906+ * We store the current state under p->pi_lock in p->saved_state and
17907+ * the try_to_wake_up() code handles this accordingly.
17908+ */
17909+static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
17910+{
17911+ struct task_struct *lock_owner, *self = current;
17912+ struct rt_mutex_waiter waiter, *top_waiter;
17913+ int ret;
17914+
17915+ rt_mutex_init_waiter(&waiter, true);
17916+
17917+ raw_spin_lock(&lock->wait_lock);
17918+ init_lists(lock);
17919+
17920+ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
17921+ raw_spin_unlock(&lock->wait_lock);
17922+ return;
17923+ }
17924+
17925+ BUG_ON(rt_mutex_owner(lock) == self);
17926+
17927+ /*
17928+ * We save whatever state the task is in and we'll restore it
17929+ * after acquiring the lock taking real wakeups into account
17930+ * as well. We are serialized via pi_lock against wakeups. See
17931+ * try_to_wake_up().
17932+ */
17933+ pi_lock(&self->pi_lock);
17934+ self->saved_state = self->state;
17935+ __set_current_state(TASK_UNINTERRUPTIBLE);
17936+ pi_unlock(&self->pi_lock);
17937+
17938+ ret = task_blocks_on_rt_mutex(lock, &waiter, self, 0);
17939+ BUG_ON(ret);
17940+
17941+ for (;;) {
17942+ /* Try to acquire the lock again. */
17943+ if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
17944+ break;
17945+
17946+ top_waiter = rt_mutex_top_waiter(lock);
17947+ lock_owner = rt_mutex_owner(lock);
17948+
17949+ raw_spin_unlock(&lock->wait_lock);
17950+
17951+ debug_rt_mutex_print_deadlock(&waiter);
17952+
17953+ if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
17954+ schedule_rt_mutex(lock);
17955+
17956+ raw_spin_lock(&lock->wait_lock);
17957+
17958+ pi_lock(&self->pi_lock);
17959+ __set_current_state(TASK_UNINTERRUPTIBLE);
17960+ pi_unlock(&self->pi_lock);
17961+ }
17962+
17963+ /*
17964+ * Restore the task state to current->saved_state. We set it
17965+ * to the original state above and the try_to_wake_up() code
17966+ * has possibly updated it when a real (non-rtmutex) wakeup
17967+ * happened while we were blocked. Clear saved_state so
17968+ * try_to_wakeup() does not get confused.
17969+ */
17970+ pi_lock(&self->pi_lock);
17971+ __set_current_state(self->saved_state);
17972+ self->saved_state = TASK_RUNNING;
17973+ pi_unlock(&self->pi_lock);
17974+
17975+ /*
17976+ * try_to_take_rt_mutex() sets the waiter bit
17977+ * unconditionally. We might have to fix that up:
17978+ */
17979+ fixup_rt_mutex_waiters(lock);
17980+
17981+ BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
17982+ BUG_ON(!plist_node_empty(&waiter.list_entry));
17983+
17984+ raw_spin_unlock(&lock->wait_lock);
17985+
17986+ debug_rt_mutex_free_waiter(&waiter);
17987+}
17988+
17989+/*
17990+ * Slow path to release a rt_mutex spin_lock style
17991+ */
17992+static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
17993+{
17994+ raw_spin_lock(&lock->wait_lock);
17995+
17996+ debug_rt_mutex_unlock(lock);
17997+
17998+ rt_mutex_deadlock_account_unlock(current);
17999+
18000+ if (!rt_mutex_has_waiters(lock)) {
18001+ lock->owner = NULL;
18002+ raw_spin_unlock(&lock->wait_lock);
18003+ return;
18004+ }
18005+
18006+ wakeup_next_waiter(lock);
18007+
18008+ raw_spin_unlock(&lock->wait_lock);
18009+
18010+ /* Undo pi boosting.when necessary */
18011+ rt_mutex_adjust_prio(current);
18012+}
18013+
18014+void __lockfunc rt_spin_lock(spinlock_t *lock)
18015+{
18016+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
18017+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
18018+}
18019+EXPORT_SYMBOL(rt_spin_lock);
18020+
18021+void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
18022+{
18023+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
18024+}
18025+EXPORT_SYMBOL(__rt_spin_lock);
18026+
18027+#ifdef CONFIG_DEBUG_LOCK_ALLOC
18028+void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
18029+{
18030+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
18031+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
18032+}
18033+EXPORT_SYMBOL(rt_spin_lock_nested);
18034+#endif
18035+
18036+void __lockfunc rt_spin_unlock(spinlock_t *lock)
18037+{
18038+ /* NOTE: we always pass in '1' for nested, for simplicity */
18039+ spin_release(&lock->dep_map, 1, _RET_IP_);
18040+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
18041+}
18042+EXPORT_SYMBOL(rt_spin_unlock);
18043+
18044+void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
18045+{
18046+ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
18047+}
18048+EXPORT_SYMBOL(__rt_spin_unlock);
18049+
18050+/*
18051+ * Wait for the lock to get unlocked: instead of polling for an unlock
18052+ * (like raw spinlocks do), we lock and unlock, to force the kernel to
18053+ * schedule if there's contention:
18054+ */
18055+void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
18056+{
18057+ spin_lock(lock);
18058+ spin_unlock(lock);
18059+}
18060+EXPORT_SYMBOL(rt_spin_unlock_wait);
18061+
18062+int __lockfunc rt_spin_trylock(spinlock_t *lock)
18063+{
18064+ int ret = rt_mutex_trylock(&lock->lock);
18065+
18066+ if (ret)
18067+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
18068+ return ret;
18069+}
18070+EXPORT_SYMBOL(rt_spin_trylock);
18071+
18072+int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
18073+{
18074+ int ret;
18075+
18076+ local_bh_disable();
18077+ ret = rt_mutex_trylock(&lock->lock);
18078+ if (ret) {
18079+ migrate_disable();
18080+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
18081+ } else
18082+ local_bh_enable();
18083+ return ret;
18084+}
18085+EXPORT_SYMBOL(rt_spin_trylock_bh);
18086+
18087+int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
18088+{
18089+ int ret;
18090+
18091+ *flags = 0;
18092+ migrate_disable();
18093+ ret = rt_mutex_trylock(&lock->lock);
18094+ if (ret)
18095+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
18096+ else
18097+ migrate_enable();
18098+ return ret;
18099+}
18100+EXPORT_SYMBOL(rt_spin_trylock_irqsave);
18101+
18102+int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
18103+{
18104+ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
18105+ if (atomic_add_unless(atomic, -1, 1))
18106+ return 0;
18107+ migrate_disable();
18108+ rt_spin_lock(lock);
18109+ if (atomic_dec_and_test(atomic))
18110+ return 1;
18111+ rt_spin_unlock(lock);
18112+ migrate_enable();
18113+ return 0;
18114+}
18115+EXPORT_SYMBOL(atomic_dec_and_spin_lock);
18116+
18117+void
18118+__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
18119+{
18120+#ifdef CONFIG_DEBUG_LOCK_ALLOC
18121+ /*
18122+ * Make sure we are not reinitializing a held lock:
18123+ */
18124+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
18125+ lockdep_init_map(&lock->dep_map, name, key, 0);
18126+#endif
18127+}
18128+EXPORT_SYMBOL(__rt_spin_lock_init);
18129+
18130+#endif /* PREEMPT_RT_FULL */
18131+
18132 /**
18133 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
18134 * @lock: the rt_mutex to take
18135 * @state: the state the task should block in (TASK_INTERRUPTIBLE
18136- * or TASK_UNINTERRUPTIBLE)
18137+ * or TASK_UNINTERRUPTIBLE)
18138 * @timeout: the pre-initialized and started timer, or NULL for none
18139 * @waiter: the pre-initialized rt_mutex_waiter
18140 *
18141@@ -623,9 +1001,10 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
18142 struct rt_mutex_waiter waiter;
18143 int ret = 0;
18144
18145- debug_rt_mutex_init_waiter(&waiter);
18146+ rt_mutex_init_waiter(&waiter, false);
18147
18148 raw_spin_lock(&lock->wait_lock);
18149+ init_lists(lock);
18150
18151 /* Try to acquire the lock again: */
18152 if (try_to_take_rt_mutex(lock, current, NULL)) {
18153@@ -678,6 +1057,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
18154 int ret = 0;
18155
18156 raw_spin_lock(&lock->wait_lock);
18157+ init_lists(lock);
18158
18159 if (likely(rt_mutex_owner(lock) != current)) {
18160
18161@@ -791,12 +1171,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
18162 /**
18163 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
18164 *
18165- * @lock: the rt_mutex to be locked
18166+ * @lock: the rt_mutex to be locked
18167 * @detect_deadlock: deadlock detection on/off
18168 *
18169 * Returns:
18170- * 0 on success
18171- * -EINTR when interrupted by a signal
18172+ * 0 on success
18173+ * -EINTR when interrupted by a signal
18174 * -EDEADLK when the lock would deadlock (when deadlock detection is on)
18175 */
18176 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
18177@@ -810,17 +1190,38 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
18178 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
18179
18180 /**
18181+ * rt_mutex_lock_killable - lock a rt_mutex killable
18182+ *
18183+ * @lock: the rt_mutex to be locked
18184+ * @detect_deadlock: deadlock detection on/off
18185+ *
18186+ * Returns:
18187+ * 0 on success
18188+ * -EINTR when interrupted by a signal
18189+ * -EDEADLK when the lock would deadlock (when deadlock detection is on)
18190+ */
18191+int __sched rt_mutex_lock_killable(struct rt_mutex *lock,
18192+ int detect_deadlock)
18193+{
18194+ might_sleep();
18195+
18196+ return rt_mutex_fastlock(lock, TASK_KILLABLE,
18197+ detect_deadlock, rt_mutex_slowlock);
18198+}
18199+EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
18200+
18201+/**
18202 * rt_mutex_timed_lock - lock a rt_mutex interruptible
18203 * the timeout structure is provided
18204 * by the caller
18205 *
18206- * @lock: the rt_mutex to be locked
18207+ * @lock: the rt_mutex to be locked
18208 * @timeout: timeout structure or NULL (no timeout)
18209 * @detect_deadlock: deadlock detection on/off
18210 *
18211 * Returns:
18212- * 0 on success
18213- * -EINTR when interrupted by a signal
18214+ * 0 on success
18215+ * -EINTR when interrupted by a signal
18216 * -ETIMEDOUT when the timeout expired
18217 * -EDEADLK when the lock would deadlock (when deadlock detection is on)
18218 */
18219@@ -889,12 +1290,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
18220 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
18221 {
18222 lock->owner = NULL;
18223- raw_spin_lock_init(&lock->wait_lock);
18224 plist_head_init(&lock->wait_list);
18225
18226 debug_rt_mutex_init(lock, name);
18227 }
18228-EXPORT_SYMBOL_GPL(__rt_mutex_init);
18229+EXPORT_SYMBOL(__rt_mutex_init);
18230
18231 /**
18232 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
18233@@ -909,7 +1309,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
18234 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
18235 struct task_struct *proxy_owner)
18236 {
18237- __rt_mutex_init(lock, NULL);
18238+ rt_mutex_init(lock);
18239 debug_rt_mutex_proxy_lock(lock, proxy_owner);
18240 rt_mutex_set_owner(lock, proxy_owner);
18241 rt_mutex_deadlock_account_lock(lock, proxy_owner);
18242@@ -958,6 +1358,35 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
18243 return 1;
18244 }
18245
18246+#ifdef CONFIG_PREEMPT_RT_FULL
18247+ /*
18248+ * In PREEMPT_RT there's an added race.
18249+ * If the task, that we are about to requeue, times out,
18250+ * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
18251+ * to skip this task. But right after the task sets
18252+ * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
18253+ * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
18254+ * This will replace the PI_WAKEUP_INPROGRESS with the actual
18255+ * lock that it blocks on. We *must not* place this task
18256+ * on this proxy lock in that case.
18257+ *
18258+ * To prevent this race, we first take the task's pi_lock
18259+ * and check if it has updated its pi_blocked_on. If it has,
18260+ * we assume that it woke up and we return -EAGAIN.
18261+ * Otherwise, we set the task's pi_blocked_on to
18262+ * PI_REQUEUE_INPROGRESS, so that if the task is waking up
18263+ * it will know that we are in the process of requeuing it.
18264+ */
18265+ raw_spin_lock_irq(&task->pi_lock);
18266+ if (task->pi_blocked_on) {
18267+ raw_spin_unlock_irq(&task->pi_lock);
18268+ raw_spin_unlock(&lock->wait_lock);
18269+ return -EAGAIN;
18270+ }
18271+ task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
18272+ raw_spin_unlock_irq(&task->pi_lock);
18273+#endif
18274+
18275 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
18276
18277 if (ret && !rt_mutex_owner(lock)) {
18278diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
18279index 53a66c8..6ec3dc1 100644
18280--- a/kernel/rtmutex_common.h
18281+++ b/kernel/rtmutex_common.h
18282@@ -49,6 +49,7 @@ struct rt_mutex_waiter {
18283 struct plist_node pi_list_entry;
18284 struct task_struct *task;
18285 struct rt_mutex *lock;
18286+ bool savestate;
18287 #ifdef CONFIG_DEBUG_RT_MUTEXES
18288 unsigned long ip;
18289 struct pid *deadlock_task_pid;
18290@@ -103,6 +104,9 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
18291 /*
18292 * PI-futex support (proxy locking functions, etc.):
18293 */
18294+#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
18295+#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
18296+
18297 extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
18298 extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
18299 struct task_struct *proxy_owner);
18300@@ -123,4 +127,12 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
18301 # include "rtmutex.h"
18302 #endif
18303
18304+static inline void
18305+rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
18306+{
18307+ debug_rt_mutex_init_waiter(waiter);
18308+ waiter->task = NULL;
18309+ waiter->savestate = savestate;
18310+}
18311+
18312 #endif
18313diff --git a/kernel/sched/core.c b/kernel/sched/core.c
18314index 5e2f7c3..505e08f 100644
18315--- a/kernel/sched/core.c
18316+++ b/kernel/sched/core.c
18317@@ -272,7 +272,11 @@ late_initcall(sched_init_debug);
18318 * Number of tasks to iterate in a single balance run.
18319 * Limited because this is done with IRQs disabled.
18320 */
18321+#ifndef CONFIG_PREEMPT_RT_FULL
18322 const_debug unsigned int sysctl_sched_nr_migrate = 32;
18323+#else
18324+const_debug unsigned int sysctl_sched_nr_migrate = 8;
18325+#endif
18326
18327 /*
18328 * period over which we average the RT time consumption, measured
18329@@ -489,6 +493,7 @@ static void init_rq_hrtick(struct rq *rq)
18330
18331 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
18332 rq->hrtick_timer.function = hrtick;
18333+ rq->hrtick_timer.irqsafe = 1;
18334 }
18335 #else /* CONFIG_SCHED_HRTICK */
18336 static inline void hrtick_clear(struct rq *rq)
18337@@ -538,6 +543,37 @@ void resched_task(struct task_struct *p)
18338 smp_send_reschedule(cpu);
18339 }
18340
18341+#ifdef CONFIG_PREEMPT_LAZY
18342+void resched_task_lazy(struct task_struct *p)
18343+{
18344+ int cpu;
18345+
18346+ if (!sched_feat(PREEMPT_LAZY)) {
18347+ resched_task(p);
18348+ return;
18349+ }
18350+
18351+ assert_raw_spin_locked(&task_rq(p)->lock);
18352+
18353+ if (test_tsk_need_resched(p))
18354+ return;
18355+
18356+ if (test_tsk_need_resched_lazy(p))
18357+ return;
18358+
18359+ set_tsk_need_resched_lazy(p);
18360+
18361+ cpu = task_cpu(p);
18362+ if (cpu == smp_processor_id())
18363+ return;
18364+
18365+ /* NEED_RESCHED_LAZY must be visible before we test polling */
18366+ smp_mb();
18367+ if (!tsk_is_polling(p))
18368+ smp_send_reschedule(cpu);
18369+}
18370+#endif
18371+
18372 void resched_cpu(int cpu)
18373 {
18374 struct rq *rq = cpu_rq(cpu);
18375@@ -654,6 +690,17 @@ void resched_task(struct task_struct *p)
18376 assert_raw_spin_locked(&task_rq(p)->lock);
18377 set_tsk_need_resched(p);
18378 }
18379+#ifdef CONFIG_PREEMPT_LAZY
18380+void resched_task_lazy(struct task_struct *p)
18381+{
18382+ if (!sched_feat(PREEMPT_LAZY)) {
18383+ resched_task(p);
18384+ return;
18385+ }
18386+ assert_raw_spin_locked(&task_rq(p)->lock);
18387+ set_tsk_need_resched_lazy(p);
18388+}
18389+#endif
18390 #endif /* CONFIG_SMP */
18391
18392 #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
18393@@ -1036,7 +1083,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
18394 * is actually now running somewhere else!
18395 */
18396 while (task_running(rq, p)) {
18397- if (match_state && unlikely(p->state != match_state))
18398+ if (match_state && unlikely(p->state != match_state)
18399+ && unlikely(p->saved_state != match_state))
18400 return 0;
18401 cpu_relax();
18402 }
18403@@ -1051,7 +1099,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
18404 running = task_running(rq, p);
18405 on_rq = p->on_rq;
18406 ncsw = 0;
18407- if (!match_state || p->state == match_state)
18408+ if (!match_state || p->state == match_state
18409+ || p->saved_state == match_state)
18410 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
18411 task_rq_unlock(rq, p, &flags);
18412
18413@@ -1187,6 +1236,12 @@ out:
18414 }
18415 }
18416
18417+ /*
18418+ * Clear PF_THREAD_BOUND, otherwise we wreckage
18419+ * migrate_disable/enable. See optimization for
18420+ * PF_THREAD_BOUND tasks there.
18421+ */
18422+ p->flags &= ~PF_THREAD_BOUND;
18423 return dest_cpu;
18424 }
18425
18426@@ -1433,8 +1488,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
18427
18428 smp_wmb();
18429 raw_spin_lock_irqsave(&p->pi_lock, flags);
18430- if (!(p->state & state))
18431+ if (!(p->state & state)) {
18432+ /*
18433+ * The task might be running due to a spinlock sleeper
18434+ * wakeup. Check the saved state and set it to running
18435+ * if the wakeup condition is true.
18436+ */
18437+ if (!(wake_flags & WF_LOCK_SLEEPER)) {
18438+ if (p->saved_state & state) {
18439+ p->saved_state = TASK_RUNNING;
18440+ success = 1;
18441+ }
18442+ }
18443 goto out;
18444+ }
18445+
18446+ /*
18447+ * If this is a regular wakeup, then we can unconditionally
18448+ * clear the saved state of a "lock sleeper".
18449+ */
18450+ if (!(wake_flags & WF_LOCK_SLEEPER))
18451+ p->saved_state = TASK_RUNNING;
18452
18453 success = 1; /* we're going to change ->state */
18454 cpu = task_cpu(p);
18455@@ -1530,6 +1604,18 @@ int wake_up_process(struct task_struct *p)
18456 }
18457 EXPORT_SYMBOL(wake_up_process);
18458
18459+/**
18460+ * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
18461+ * @p: The process to be woken up.
18462+ *
18463+ * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
18464+ * the nature of the wakeup.
18465+ */
18466+int wake_up_lock_sleeper(struct task_struct *p)
18467+{
18468+ return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
18469+}
18470+
18471 int wake_up_state(struct task_struct *p, unsigned int state)
18472 {
18473 return try_to_wake_up(p, state, 0);
18474@@ -1676,6 +1762,9 @@ void sched_fork(struct task_struct *p)
18475 /* Want to start with kernel preemption disabled. */
18476 task_thread_info(p)->preempt_count = 1;
18477 #endif
18478+#ifdef CONFIG_HAVE_PREEMPT_LAZY
18479+ task_thread_info(p)->preempt_lazy_count = 0;
18480+#endif
18481 #ifdef CONFIG_SMP
18482 plist_node_init(&p->pushable_tasks, MAX_PRIO);
18483 #endif
18484@@ -1842,8 +1931,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
18485 finish_arch_post_lock_switch();
18486
18487 fire_sched_in_preempt_notifiers(current);
18488+ /*
18489+ * We use mmdrop_delayed() here so we don't have to do the
18490+ * full __mmdrop() when we are the last user.
18491+ */
18492 if (mm)
18493- mmdrop(mm);
18494+ mmdrop_delayed(mm);
18495 if (unlikely(prev_state == TASK_DEAD)) {
18496 /*
18497 * Remove function-return probe instances associated with this
18498@@ -2743,8 +2836,13 @@ void __kprobes add_preempt_count(int val)
18499 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
18500 PREEMPT_MASK - 10);
18501 #endif
18502- if (preempt_count() == val)
18503- trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
18504+ if (preempt_count() == val) {
18505+ unsigned long ip = get_parent_ip(CALLER_ADDR1);
18506+#ifdef CONFIG_DEBUG_PREEMPT
18507+ current->preempt_disable_ip = ip;
18508+#endif
18509+ trace_preempt_off(CALLER_ADDR0, ip);
18510+ }
18511 }
18512 EXPORT_SYMBOL(add_preempt_count);
18513
18514@@ -2787,6 +2885,13 @@ static noinline void __schedule_bug(struct task_struct *prev)
18515 print_modules();
18516 if (irqs_disabled())
18517 print_irqtrace_events(prev);
18518+#ifdef CONFIG_DEBUG_PREEMPT
18519+ if (in_atomic_preempt_off()) {
18520+ pr_err("Preemption disabled at:");
18521+ print_ip_sym(current->preempt_disable_ip);
18522+ pr_cont("\n");
18523+ }
18524+#endif
18525 dump_stack();
18526 add_taint(TAINT_WARN);
18527 }
18528@@ -2810,6 +2915,128 @@ static inline void schedule_debug(struct task_struct *prev)
18529 schedstat_inc(this_rq(), sched_count);
18530 }
18531
18532+#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
18533+#define MIGRATE_DISABLE_SET_AFFIN (1<<30) /* Can't make a negative */
18534+#define migrate_disabled_updated(p) ((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN)
18535+#define migrate_disable_count(p) ((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN)
18536+
18537+static inline void update_migrate_disable(struct task_struct *p)
18538+{
18539+ const struct cpumask *mask;
18540+
18541+ if (likely(!p->migrate_disable))
18542+ return;
18543+
18544+ /* Did we already update affinity? */
18545+ if (unlikely(migrate_disabled_updated(p)))
18546+ return;
18547+
18548+ /*
18549+ * Since this is always current we can get away with only locking
18550+ * rq->lock, the ->cpus_allowed value can normally only be changed
18551+ * while holding both p->pi_lock and rq->lock, but seeing that this
18552+ * is current, we cannot actually be waking up, so all code that
18553+ * relies on serialization against p->pi_lock is out of scope.
18554+ *
18555+ * Having rq->lock serializes us against things like
18556+ * set_cpus_allowed_ptr() that can still happen concurrently.
18557+ */
18558+ mask = tsk_cpus_allowed(p);
18559+
18560+ if (p->sched_class->set_cpus_allowed)
18561+ p->sched_class->set_cpus_allowed(p, mask);
18562+ p->nr_cpus_allowed = cpumask_weight(mask);
18563+
18564+ /* Let migrate_enable know to fix things back up */
18565+ p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
18566+}
18567+
18568+void migrate_disable(void)
18569+{
18570+ struct task_struct *p = current;
18571+
18572+ if (in_atomic()) {
18573+#ifdef CONFIG_SCHED_DEBUG
18574+ p->migrate_disable_atomic++;
18575+#endif
18576+ return;
18577+ }
18578+
18579+#ifdef CONFIG_SCHED_DEBUG
18580+ WARN_ON_ONCE(p->migrate_disable_atomic);
18581+#endif
18582+
18583+ preempt_disable();
18584+ if (p->migrate_disable) {
18585+ p->migrate_disable++;
18586+ preempt_enable();
18587+ return;
18588+ }
18589+
18590+ preempt_lazy_disable();
18591+ pin_current_cpu();
18592+ p->migrate_disable = 1;
18593+ preempt_enable();
18594+}
18595+EXPORT_SYMBOL(migrate_disable);
18596+
18597+void migrate_enable(void)
18598+{
18599+ struct task_struct *p = current;
18600+ const struct cpumask *mask;
18601+ unsigned long flags;
18602+ struct rq *rq;
18603+
18604+ if (in_atomic()) {
18605+#ifdef CONFIG_SCHED_DEBUG
18606+ p->migrate_disable_atomic--;
18607+#endif
18608+ return;
18609+ }
18610+
18611+#ifdef CONFIG_SCHED_DEBUG
18612+ WARN_ON_ONCE(p->migrate_disable_atomic);
18613+#endif
18614+ WARN_ON_ONCE(p->migrate_disable <= 0);
18615+
18616+ preempt_disable();
18617+ if (migrate_disable_count(p) > 1) {
18618+ p->migrate_disable--;
18619+ preempt_enable();
18620+ return;
18621+ }
18622+
18623+ if (unlikely(migrate_disabled_updated(p))) {
18624+ /*
18625+ * Undo whatever update_migrate_disable() did, also see there
18626+ * about locking.
18627+ */
18628+ rq = this_rq();
18629+ raw_spin_lock_irqsave(&rq->lock, flags);
18630+
18631+ /*
18632+ * Clearing migrate_disable causes tsk_cpus_allowed to
18633+ * show the tasks original cpu affinity.
18634+ */
18635+ p->migrate_disable = 0;
18636+ mask = tsk_cpus_allowed(p);
18637+ if (p->sched_class->set_cpus_allowed)
18638+ p->sched_class->set_cpus_allowed(p, mask);
18639+ p->nr_cpus_allowed = cpumask_weight(mask);
18640+ raw_spin_unlock_irqrestore(&rq->lock, flags);
18641+ } else
18642+ p->migrate_disable = 0;
18643+
18644+ unpin_current_cpu();
18645+ preempt_enable();
18646+ preempt_lazy_enable();
18647+}
18648+EXPORT_SYMBOL(migrate_enable);
18649+#else
18650+static inline void update_migrate_disable(struct task_struct *p) { }
18651+#define migrate_disabled_updated(p) 0
18652+#endif
18653+
18654 static void put_prev_task(struct rq *rq, struct task_struct *prev)
18655 {
18656 if (prev->on_rq || rq->skip_clock_update < 0)
18657@@ -2903,6 +3130,8 @@ need_resched:
18658
18659 raw_spin_lock_irq(&rq->lock);
18660
18661+ update_migrate_disable(prev);
18662+
18663 switch_count = &prev->nivcsw;
18664 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
18665 if (unlikely(signal_pending_state(prev->state, prev))) {
18666@@ -2915,8 +3144,10 @@ need_resched:
18667 * If a worker went to sleep, notify and ask workqueue
18668 * whether it wants to wake up a task to maintain
18669 * concurrency.
18670+ * Only call wake up if prev isn't blocked on a sleeping
18671+ * spin lock.
18672 */
18673- if (prev->flags & PF_WQ_WORKER) {
18674+ if (prev->flags & PF_WQ_WORKER && !prev->saved_state) {
18675 struct task_struct *to_wakeup;
18676
18677 to_wakeup = wq_worker_sleeping(prev, cpu);
18678@@ -2935,6 +3166,7 @@ need_resched:
18679 put_prev_task(rq, prev);
18680 next = pick_next_task(rq);
18681 clear_tsk_need_resched(prev);
18682+ clear_tsk_need_resched_lazy(prev);
18683 rq->skip_clock_update = 0;
18684
18685 if (likely(prev != next)) {
18686@@ -3071,9 +3303,26 @@ asmlinkage void __sched notrace preempt_schedule(void)
18687 if (likely(ti->preempt_count || irqs_disabled()))
18688 return;
18689
18690+#ifdef CONFIG_PREEMPT_LAZY
18691+ /*
18692+ * Check for lazy preemption
18693+ */
18694+ if (ti->preempt_lazy_count && !test_thread_flag(TIF_NEED_RESCHED))
18695+ return;
18696+#endif
18697+
18698 do {
18699 add_preempt_count_notrace(PREEMPT_ACTIVE);
18700+ /*
18701+ * The add/subtract must not be traced by the function
18702+ * tracer. But we still want to account for the
18703+ * preempt off latency tracer. Since the _notrace versions
18704+ * of add/subtract skip the accounting for latency tracer
18705+ * we must force it manually.
18706+ */
18707+ start_critical_timings();
18708 __schedule();
18709+ stop_critical_timings();
18710 sub_preempt_count_notrace(PREEMPT_ACTIVE);
18711
18712 /*
18713@@ -3242,10 +3491,10 @@ void complete(struct completion *x)
18714 {
18715 unsigned long flags;
18716
18717- spin_lock_irqsave(&x->wait.lock, flags);
18718+ raw_spin_lock_irqsave(&x->wait.lock, flags);
18719 x->done++;
18720- __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
18721- spin_unlock_irqrestore(&x->wait.lock, flags);
18722+ __swait_wake_locked(&x->wait, TASK_NORMAL, 1);
18723+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
18724 }
18725 EXPORT_SYMBOL(complete);
18726
18727@@ -3262,10 +3511,10 @@ void complete_all(struct completion *x)
18728 {
18729 unsigned long flags;
18730
18731- spin_lock_irqsave(&x->wait.lock, flags);
18732+ raw_spin_lock_irqsave(&x->wait.lock, flags);
18733 x->done += UINT_MAX/2;
18734- __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
18735- spin_unlock_irqrestore(&x->wait.lock, flags);
18736+ __swait_wake_locked(&x->wait, TASK_NORMAL, 0);
18737+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
18738 }
18739 EXPORT_SYMBOL(complete_all);
18740
18741@@ -3273,20 +3522,20 @@ static inline long __sched
18742 do_wait_for_common(struct completion *x, long timeout, int state)
18743 {
18744 if (!x->done) {
18745- DECLARE_WAITQUEUE(wait, current);
18746+ DEFINE_SWAITER(wait);
18747
18748- __add_wait_queue_tail_exclusive(&x->wait, &wait);
18749+ swait_prepare_locked(&x->wait, &wait);
18750 do {
18751 if (signal_pending_state(state, current)) {
18752 timeout = -ERESTARTSYS;
18753 break;
18754 }
18755 __set_current_state(state);
18756- spin_unlock_irq(&x->wait.lock);
18757+ raw_spin_unlock_irq(&x->wait.lock);
18758 timeout = schedule_timeout(timeout);
18759- spin_lock_irq(&x->wait.lock);
18760+ raw_spin_lock_irq(&x->wait.lock);
18761 } while (!x->done && timeout);
18762- __remove_wait_queue(&x->wait, &wait);
18763+ swait_finish_locked(&x->wait, &wait);
18764 if (!x->done)
18765 return timeout;
18766 }
18767@@ -3299,9 +3548,9 @@ wait_for_common(struct completion *x, long timeout, int state)
18768 {
18769 might_sleep();
18770
18771- spin_lock_irq(&x->wait.lock);
18772+ raw_spin_lock_irq(&x->wait.lock);
18773 timeout = do_wait_for_common(x, timeout, state);
18774- spin_unlock_irq(&x->wait.lock);
18775+ raw_spin_unlock_irq(&x->wait.lock);
18776 return timeout;
18777 }
18778
18779@@ -3432,12 +3681,12 @@ bool try_wait_for_completion(struct completion *x)
18780 unsigned long flags;
18781 int ret = 1;
18782
18783- spin_lock_irqsave(&x->wait.lock, flags);
18784+ raw_spin_lock_irqsave(&x->wait.lock, flags);
18785 if (!x->done)
18786 ret = 0;
18787 else
18788 x->done--;
18789- spin_unlock_irqrestore(&x->wait.lock, flags);
18790+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
18791 return ret;
18792 }
18793 EXPORT_SYMBOL(try_wait_for_completion);
18794@@ -3455,10 +3704,10 @@ bool completion_done(struct completion *x)
18795 unsigned long flags;
18796 int ret = 1;
18797
18798- spin_lock_irqsave(&x->wait.lock, flags);
18799+ raw_spin_lock_irqsave(&x->wait.lock, flags);
18800 if (!x->done)
18801 ret = 0;
18802- spin_unlock_irqrestore(&x->wait.lock, flags);
18803+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
18804 return ret;
18805 }
18806 EXPORT_SYMBOL(completion_done);
18807@@ -3519,7 +3768,8 @@ EXPORT_SYMBOL(sleep_on_timeout);
18808 * This function changes the 'effective' priority of a task. It does
18809 * not touch ->normal_prio like __setscheduler().
18810 *
18811- * Used by the rt_mutex code to implement priority inheritance logic.
18812+ * Used by the rt_mutex code to implement priority inheritance
18813+ * logic. Call site only calls if the priority of the task changed.
18814 */
18815 void rt_mutex_setprio(struct task_struct *p, int prio)
18816 {
18817@@ -3742,20 +3992,25 @@ static struct task_struct *find_process_by_pid(pid_t pid)
18818 return pid ? find_task_by_vpid(pid) : current;
18819 }
18820
18821-/* Actually do priority change: must hold rq lock. */
18822-static void
18823-__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
18824+static void __setscheduler_params(struct task_struct *p, int policy, int prio)
18825 {
18826 p->policy = policy;
18827 p->rt_priority = prio;
18828 p->normal_prio = normal_prio(p);
18829+ set_load_weight(p);
18830+}
18831+
18832+/* Actually do priority change: must hold rq lock. */
18833+static void
18834+__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
18835+{
18836+ __setscheduler_params(p, policy, prio);
18837 /* we are holding p->pi_lock already */
18838 p->prio = rt_mutex_getprio(p);
18839 if (rt_prio(p->prio))
18840 p->sched_class = &rt_sched_class;
18841 else
18842 p->sched_class = &fair_sched_class;
18843- set_load_weight(p);
18844 }
18845
18846 /*
18847@@ -3777,6 +4032,7 @@ static bool check_same_owner(struct task_struct *p)
18848 static int __sched_setscheduler(struct task_struct *p, int policy,
18849 const struct sched_param *param, bool user)
18850 {
18851+ int newprio = MAX_RT_PRIO - 1 - param->sched_priority;
18852 int retval, oldprio, oldpolicy = -1, on_rq, running;
18853 unsigned long flags;
18854 const struct sched_class *prev_class;
18855@@ -3872,10 +4128,13 @@ recheck:
18856 }
18857
18858 /*
18859- * If not changing anything there's no need to proceed further:
18860+ * If not changing anything there's no need to proceed
18861+ * further, but store a possible modification of
18862+ * reset_on_fork.
18863 */
18864 if (unlikely(policy == p->policy && (!rt_policy(policy) ||
18865 param->sched_priority == p->rt_priority))) {
18866+ p->sched_reset_on_fork = reset_on_fork;
18867 task_rq_unlock(rq, p, &flags);
18868 return 0;
18869 }
18870@@ -3901,6 +4160,25 @@ recheck:
18871 task_rq_unlock(rq, p, &flags);
18872 goto recheck;
18873 }
18874+
18875+ p->sched_reset_on_fork = reset_on_fork;
18876+ oldprio = p->prio;
18877+
18878+ /*
18879+ * Special case for priority boosted tasks.
18880+ *
18881+ * If the new priority is lower or equal (user space view)
18882+ * than the current (boosted) priority, we just store the new
18883+ * normal parameters and do not touch the scheduler class and
18884+ * the runqueue. This will be done when the task deboost
18885+ * itself.
18886+ */
18887+ if (rt_mutex_check_prio(p, newprio)) {
18888+ __setscheduler_params(p, policy, param->sched_priority);
18889+ task_rq_unlock(rq, p, &flags);
18890+ return 0;
18891+ }
18892+
18893 on_rq = p->on_rq;
18894 running = task_current(rq, p);
18895 if (on_rq)
18896@@ -3908,17 +4186,18 @@ recheck:
18897 if (running)
18898 p->sched_class->put_prev_task(rq, p);
18899
18900- p->sched_reset_on_fork = reset_on_fork;
18901-
18902- oldprio = p->prio;
18903 prev_class = p->sched_class;
18904 __setscheduler(rq, p, policy, param->sched_priority);
18905
18906 if (running)
18907 p->sched_class->set_curr_task(rq);
18908- if (on_rq)
18909- enqueue_task(rq, p, 0);
18910-
18911+ if (on_rq) {
18912+ /*
18913+ * We enqueue to tail when the priority of a task is
18914+ * increased (user space view).
18915+ */
18916+ enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
18917+ }
18918 check_class_changed(rq, p, prev_class, oldprio);
18919 task_rq_unlock(rq, p, &flags);
18920
18921@@ -4270,9 +4549,17 @@ static inline int should_resched(void)
18922
18923 static void __cond_resched(void)
18924 {
18925- add_preempt_count(PREEMPT_ACTIVE);
18926- __schedule();
18927- sub_preempt_count(PREEMPT_ACTIVE);
18928+ do {
18929+ add_preempt_count(PREEMPT_ACTIVE);
18930+ __schedule();
18931+ sub_preempt_count(PREEMPT_ACTIVE);
18932+ /*
18933+ * Check again in case we missed a preemption
18934+ * opportunity between schedule and now.
18935+ */
18936+ barrier();
18937+
18938+ } while (need_resched());
18939 }
18940
18941 int __sched _cond_resched(void)
18942@@ -4313,6 +4600,7 @@ int __cond_resched_lock(spinlock_t *lock)
18943 }
18944 EXPORT_SYMBOL(__cond_resched_lock);
18945
18946+#ifndef CONFIG_PREEMPT_RT_FULL
18947 int __sched __cond_resched_softirq(void)
18948 {
18949 BUG_ON(!in_softirq());
18950@@ -4326,6 +4614,7 @@ int __sched __cond_resched_softirq(void)
18951 return 0;
18952 }
18953 EXPORT_SYMBOL(__cond_resched_softirq);
18954+#endif
18955
18956 /**
18957 * yield - yield the current processor to other threads.
18958@@ -4656,6 +4945,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
18959 rcu_read_unlock();
18960
18961 rq->curr = rq->idle = idle;
18962+ idle->on_rq = 1;
18963 #if defined(CONFIG_SMP)
18964 idle->on_cpu = 1;
18965 #endif
18966@@ -4663,7 +4953,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
18967
18968 /* Set the preempt count _outside_ the spinlocks! */
18969 task_thread_info(idle)->preempt_count = 0;
18970-
18971+#ifdef CONFIG_HAVE_PREEMPT_LAZY
18972+ task_thread_info(idle)->preempt_lazy_count = 0;
18973+#endif
18974 /*
18975 * The idle tasks have their own, simple scheduling class:
18976 */
18977@@ -4677,11 +4969,90 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
18978 #ifdef CONFIG_SMP
18979 void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
18980 {
18981- if (p->sched_class && p->sched_class->set_cpus_allowed)
18982- p->sched_class->set_cpus_allowed(p, new_mask);
18983-
18984+ if (!migrate_disabled_updated(p)) {
18985+ if (p->sched_class && p->sched_class->set_cpus_allowed)
18986+ p->sched_class->set_cpus_allowed(p, new_mask);
18987+ p->nr_cpus_allowed = cpumask_weight(new_mask);
18988+ }
18989 cpumask_copy(&p->cpus_allowed, new_mask);
18990- p->nr_cpus_allowed = cpumask_weight(new_mask);
18991+}
18992+
18993+static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
18994+static DEFINE_MUTEX(sched_down_mutex);
18995+static cpumask_t sched_down_cpumask;
18996+
18997+void tell_sched_cpu_down_begin(int cpu)
18998+{
18999+ mutex_lock(&sched_down_mutex);
19000+ cpumask_set_cpu(cpu, &sched_down_cpumask);
19001+ mutex_unlock(&sched_down_mutex);
19002+}
19003+
19004+void tell_sched_cpu_down_done(int cpu)
19005+{
19006+ mutex_lock(&sched_down_mutex);
19007+ cpumask_clear_cpu(cpu, &sched_down_cpumask);
19008+ mutex_unlock(&sched_down_mutex);
19009+}
19010+
19011+/**
19012+ * migrate_me - try to move the current task off this cpu
19013+ *
19014+ * Used by the pin_current_cpu() code to try to get tasks
19015+ * to move off the current CPU as it is going down.
19016+ * It will only move the task if the task isn't pinned to
19017+ * the CPU (with migrate_disable, affinity or THREAD_BOUND)
19018+ * and the task has to be in a RUNNING state. Otherwise the
19019+ * movement of the task will wake it up (change its state
19020+ * to running) when the task did not expect it.
19021+ *
19022+ * Returns 1 if it succeeded in moving the current task
19023+ * 0 otherwise.
19024+ */
19025+int migrate_me(void)
19026+{
19027+ struct task_struct *p = current;
19028+ struct migration_arg arg;
19029+ struct cpumask *cpumask;
19030+ struct cpumask *mask;
19031+ unsigned long flags;
19032+ unsigned int dest_cpu;
19033+ struct rq *rq;
19034+
19035+ /*
19036+ * We can not migrate tasks bounded to a CPU or tasks not
19037+ * running. The movement of the task will wake it up.
19038+ */
19039+ if (p->flags & PF_THREAD_BOUND || p->state)
19040+ return 0;
19041+
19042+ mutex_lock(&sched_down_mutex);
19043+ rq = task_rq_lock(p, &flags);
19044+
19045+ cpumask = &__get_cpu_var(sched_cpumasks);
19046+ mask = &p->cpus_allowed;
19047+
19048+ cpumask_andnot(cpumask, mask, &sched_down_cpumask);
19049+
19050+ if (!cpumask_weight(cpumask)) {
19051+ /* It's only on this CPU? */
19052+ task_rq_unlock(rq, p, &flags);
19053+ mutex_unlock(&sched_down_mutex);
19054+ return 0;
19055+ }
19056+
19057+ dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
19058+
19059+ arg.task = p;
19060+ arg.dest_cpu = dest_cpu;
19061+
19062+ task_rq_unlock(rq, p, &flags);
19063+
19064+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
19065+ tlb_migrate_finish(p->mm);
19066+ mutex_unlock(&sched_down_mutex);
19067+
19068+ return 1;
19069 }
19070
19071 /*
19072@@ -4732,7 +5103,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
19073 do_set_cpus_allowed(p, new_mask);
19074
19075 /* Can the task run on the task's current CPU? If so, we're done */
19076- if (cpumask_test_cpu(task_cpu(p), new_mask))
19077+ if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
19078 goto out;
19079
19080 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
19081@@ -4821,6 +5192,8 @@ static int migration_cpu_stop(void *data)
19082
19083 #ifdef CONFIG_HOTPLUG_CPU
19084
19085+static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
19086+
19087 /*
19088 * Ensures that the idle task is using init_mm right before its cpu goes
19089 * offline.
19090@@ -4833,7 +5206,12 @@ void idle_task_exit(void)
19091
19092 if (mm != &init_mm)
19093 switch_mm(mm, &init_mm, current);
19094- mmdrop(mm);
19095+
19096+ /*
19097+ * Defer the cleanup to an alive cpu. On RT we can neither
19098+ * call mmdrop() nor mmdrop_delayed() from here.
19099+ */
19100+ per_cpu(idle_last_mm, smp_processor_id()) = mm;
19101 }
19102
19103 /*
19104@@ -5150,6 +5528,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
19105
19106 case CPU_DEAD:
19107 calc_load_migrate(rq);
19108+ if (per_cpu(idle_last_mm, cpu)) {
19109+ mmdrop(per_cpu(idle_last_mm, cpu));
19110+ per_cpu(idle_last_mm, cpu) = NULL;
19111+ }
19112 break;
19113 #endif
19114 }
19115@@ -7002,7 +7384,8 @@ void __init sched_init(void)
19116 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
19117 static inline int preempt_count_equals(int preempt_offset)
19118 {
19119- int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
19120+ int nested = (preempt_count() & ~PREEMPT_ACTIVE) +
19121+ sched_rcu_preempt_depth();
19122
19123 return (nested == preempt_offset);
19124 }
19125@@ -7012,7 +7395,8 @@ void __might_sleep(const char *file, int line, int preempt_offset)
19126 static unsigned long prev_jiffy; /* ratelimiting */
19127
19128 rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
19129- if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
19130+ if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
19131+ !is_idle_task(current)) ||
19132 system_state != SYSTEM_RUNNING || oops_in_progress)
19133 return;
19134 if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
19135@@ -7030,6 +7414,13 @@ void __might_sleep(const char *file, int line, int preempt_offset)
19136 debug_show_held_locks(current);
19137 if (irqs_disabled())
19138 print_irqtrace_events(current);
19139+#ifdef CONFIG_DEBUG_PREEMPT
19140+ if (!preempt_count_equals(preempt_offset)) {
19141+ pr_err("Preemption disabled at:");
19142+ print_ip_sym(current->preempt_disable_ip);
19143+ pr_cont("\n");
19144+ }
19145+#endif
19146 dump_stack();
19147 }
19148 EXPORT_SYMBOL(__might_sleep);
19149diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
19150index 7ae4c4c..2cac500 100644
19151--- a/kernel/sched/debug.c
19152+++ b/kernel/sched/debug.c
19153@@ -253,6 +253,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
19154 P(rt_throttled);
19155 PN(rt_time);
19156 PN(rt_runtime);
19157+#ifdef CONFIG_SMP
19158+ P(rt_nr_migratory);
19159+#endif
19160
19161 #undef PN
19162 #undef P
19163@@ -507,6 +510,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
19164 P(se.load.weight);
19165 P(policy);
19166 P(prio);
19167+#ifdef CONFIG_PREEMPT_RT_FULL
19168+ P(migrate_disable);
19169+#endif
19170+ P(nr_cpus_allowed);
19171 #undef PN
19172 #undef __PN
19173 #undef P
19174diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
19175index 81fa536..392fcf3 100644
19176--- a/kernel/sched/fair.c
19177+++ b/kernel/sched/fair.c
19178@@ -1827,7 +1827,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
19179 ideal_runtime = sched_slice(cfs_rq, curr);
19180 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
19181 if (delta_exec > ideal_runtime) {
19182- resched_task(rq_of(cfs_rq)->curr);
19183+ resched_task_lazy(rq_of(cfs_rq)->curr);
19184 /*
19185 * The current task ran long enough, ensure it doesn't get
19186 * re-elected due to buddy favours.
19187@@ -1851,7 +1851,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
19188 return;
19189
19190 if (delta > ideal_runtime)
19191- resched_task(rq_of(cfs_rq)->curr);
19192+ resched_task_lazy(rq_of(cfs_rq)->curr);
19193 }
19194
19195 static void
19196@@ -1971,7 +1971,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
19197 * validating it and just reschedule.
19198 */
19199 if (queued) {
19200- resched_task(rq_of(cfs_rq)->curr);
19201+ resched_task_lazy(rq_of(cfs_rq)->curr);
19202 return;
19203 }
19204 /*
19205@@ -2160,7 +2160,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
19206 * hierarchy can be throttled
19207 */
19208 if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
19209- resched_task(rq_of(cfs_rq)->curr);
19210+ resched_task_lazy(rq_of(cfs_rq)->curr);
19211 }
19212
19213 static __always_inline
19214@@ -2745,7 +2745,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
19215
19216 if (delta < 0) {
19217 if (rq->curr == p)
19218- resched_task(p);
19219+ resched_task_lazy(p);
19220 return;
19221 }
19222
19223@@ -3577,7 +3577,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
19224 return;
19225
19226 preempt:
19227- resched_task(curr);
19228+ resched_task_lazy(curr);
19229 /*
19230 * Only set the backward buddy when the current task is still
19231 * on the rq. This can happen when a wakeup gets interleaved
19232@@ -5772,7 +5772,7 @@ static void task_fork_fair(struct task_struct *p)
19233 * 'current' within the tree based on its new key value.
19234 */
19235 swap(curr->vruntime, se->vruntime);
19236- resched_task(rq->curr);
19237+ resched_task_lazy(rq->curr);
19238 }
19239
19240 se->vruntime -= cfs_rq->min_vruntime;
19241@@ -5797,7 +5797,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
19242 */
19243 if (rq->curr == p) {
19244 if (p->prio > oldprio)
19245- resched_task(rq->curr);
19246+ resched_task_lazy(rq->curr);
19247 } else
19248 check_preempt_curr(rq, p, 0);
19249 }
19250diff --git a/kernel/sched/features.h b/kernel/sched/features.h
19251index 1ad1d2b..771b529 100644
19252--- a/kernel/sched/features.h
19253+++ b/kernel/sched/features.h
19254@@ -57,11 +57,18 @@ SCHED_FEAT(OWNER_SPIN, true)
19255 */
19256 SCHED_FEAT(NONTASK_POWER, true)
19257
19258+#ifndef CONFIG_PREEMPT_RT_FULL
19259 /*
19260 * Queue remote wakeups on the target CPU and process them
19261 * using the scheduler IPI. Reduces rq->lock contention/bounces.
19262 */
19263 SCHED_FEAT(TTWU_QUEUE, true)
19264+#else
19265+SCHED_FEAT(TTWU_QUEUE, false)
19266+# ifdef CONFIG_PREEMPT_LAZY
19267+SCHED_FEAT(PREEMPT_LAZY, true)
19268+# endif
19269+#endif
19270
19271 SCHED_FEAT(FORCE_SD_OVERLAP, false)
19272 SCHED_FEAT(RT_RUNTIME_SHARE, true)
19273diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
19274index 4f02b28..46faf69 100644
19275--- a/kernel/sched/rt.c
19276+++ b/kernel/sched/rt.c
19277@@ -41,6 +41,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
19278
19279 hrtimer_init(&rt_b->rt_period_timer,
19280 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
19281+ rt_b->rt_period_timer.irqsafe = 1;
19282 rt_b->rt_period_timer.function = sched_rt_period_timer;
19283 }
19284
19285diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
19286index fc88644..d055951 100644
19287--- a/kernel/sched/sched.h
19288+++ b/kernel/sched/sched.h
19289@@ -897,6 +897,15 @@ extern void init_sched_fair_class(void);
19290 extern void resched_task(struct task_struct *p);
19291 extern void resched_cpu(int cpu);
19292
19293+#ifdef CONFIG_PREEMPT_LAZY
19294+extern void resched_task_lazy(struct task_struct *tsk);
19295+#else
19296+static inline void resched_task_lazy(struct task_struct *tsk)
19297+{
19298+ resched_task(tsk);
19299+}
19300+#endif
19301+
19302 extern struct rt_bandwidth def_rt_bandwidth;
19303 extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
19304
19305diff --git a/kernel/signal.c b/kernel/signal.c
19306index 50e425c..03b6e8f 100644
19307--- a/kernel/signal.c
19308+++ b/kernel/signal.c
19309@@ -348,13 +348,45 @@ static bool task_participate_group_stop(struct task_struct *task)
19310 return false;
19311 }
19312
19313+#ifdef __HAVE_ARCH_CMPXCHG
19314+static inline struct sigqueue *get_task_cache(struct task_struct *t)
19315+{
19316+ struct sigqueue *q = t->sigqueue_cache;
19317+
19318+ if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
19319+ return NULL;
19320+ return q;
19321+}
19322+
19323+static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
19324+{
19325+ if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
19326+ return 0;
19327+ return 1;
19328+}
19329+
19330+#else
19331+
19332+static inline struct sigqueue *get_task_cache(struct task_struct *t)
19333+{
19334+ return NULL;
19335+}
19336+
19337+static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
19338+{
19339+ return 1;
19340+}
19341+
19342+#endif
19343+
19344 /*
19345 * allocate a new signal queue record
19346 * - this may be called without locks if and only if t == current, otherwise an
19347 * appropriate lock must be held to stop the target task from exiting
19348 */
19349 static struct sigqueue *
19350-__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
19351+__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
19352+ int override_rlimit, int fromslab)
19353 {
19354 struct sigqueue *q = NULL;
19355 struct user_struct *user;
19356@@ -371,7 +403,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
19357 if (override_rlimit ||
19358 atomic_read(&user->sigpending) <=
19359 task_rlimit(t, RLIMIT_SIGPENDING)) {
19360- q = kmem_cache_alloc(sigqueue_cachep, flags);
19361+ if (!fromslab)
19362+ q = get_task_cache(t);
19363+ if (!q)
19364+ q = kmem_cache_alloc(sigqueue_cachep, flags);
19365 } else {
19366 print_dropped_signal(sig);
19367 }
19368@@ -388,6 +423,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
19369 return q;
19370 }
19371
19372+static struct sigqueue *
19373+__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
19374+ int override_rlimit)
19375+{
19376+ return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
19377+}
19378+
19379 static void __sigqueue_free(struct sigqueue *q)
19380 {
19381 if (q->flags & SIGQUEUE_PREALLOC)
19382@@ -397,6 +439,21 @@ static void __sigqueue_free(struct sigqueue *q)
19383 kmem_cache_free(sigqueue_cachep, q);
19384 }
19385
19386+static void sigqueue_free_current(struct sigqueue *q)
19387+{
19388+ struct user_struct *up;
19389+
19390+ if (q->flags & SIGQUEUE_PREALLOC)
19391+ return;
19392+
19393+ up = q->user;
19394+ if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
19395+ atomic_dec(&up->sigpending);
19396+ free_uid(up);
19397+ } else
19398+ __sigqueue_free(q);
19399+}
19400+
19401 void flush_sigqueue(struct sigpending *queue)
19402 {
19403 struct sigqueue *q;
19404@@ -410,6 +467,21 @@ void flush_sigqueue(struct sigpending *queue)
19405 }
19406
19407 /*
19408+ * Called from __exit_signal. Flush tsk->pending and
19409+ * tsk->sigqueue_cache
19410+ */
19411+void flush_task_sigqueue(struct task_struct *tsk)
19412+{
19413+ struct sigqueue *q;
19414+
19415+ flush_sigqueue(&tsk->pending);
19416+
19417+ q = get_task_cache(tsk);
19418+ if (q)
19419+ kmem_cache_free(sigqueue_cachep, q);
19420+}
19421+
19422+/*
19423 * Flush all pending signals for a task.
19424 */
19425 void __flush_signals(struct task_struct *t)
19426@@ -561,7 +633,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
19427 still_pending:
19428 list_del_init(&first->list);
19429 copy_siginfo(info, &first->info);
19430- __sigqueue_free(first);
19431+ sigqueue_free_current(first);
19432 } else {
19433 /*
19434 * Ok, it wasn't in the queue. This must be
19435@@ -607,6 +679,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
19436 {
19437 int signr;
19438
19439+ WARN_ON_ONCE(tsk != current);
19440+
19441 /* We only dequeue private signals from ourselves, we don't let
19442 * signalfd steal them
19443 */
19444@@ -1228,8 +1302,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
19445 * We don't want to have recursive SIGSEGV's etc, for example,
19446 * that is why we also clear SIGNAL_UNKILLABLE.
19447 */
19448-int
19449-force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
19450+static int
19451+do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
19452 {
19453 unsigned long int flags;
19454 int ret, blocked, ignored;
19455@@ -1254,6 +1328,39 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
19456 return ret;
19457 }
19458
19459+int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
19460+{
19461+/*
19462+ * On some archs, PREEMPT_RT has to delay sending a signal from a trap
19463+ * since it can not enable preemption, and the signal code's spin_locks
19464+ * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
19465+ * send the signal on exit of the trap.
19466+ */
19467+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
19468+ if (in_atomic()) {
19469+ if (WARN_ON_ONCE(t != current))
19470+ return 0;
19471+ if (WARN_ON_ONCE(t->forced_info.si_signo))
19472+ return 0;
19473+
19474+ if (is_si_special(info)) {
19475+ WARN_ON_ONCE(info != SEND_SIG_PRIV);
19476+ t->forced_info.si_signo = sig;
19477+ t->forced_info.si_errno = 0;
19478+ t->forced_info.si_code = SI_KERNEL;
19479+ t->forced_info.si_pid = 0;
19480+ t->forced_info.si_uid = 0;
19481+ } else {
19482+ t->forced_info = *info;
19483+ }
19484+
19485+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
19486+ return 0;
19487+ }
19488+#endif
19489+ return do_force_sig_info(sig, info, t);
19490+}
19491+
19492 /*
19493 * Nuke all other threads in the group.
19494 */
19495@@ -1284,12 +1391,12 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
19496 struct sighand_struct *sighand;
19497
19498 for (;;) {
19499- local_irq_save(*flags);
19500+ local_irq_save_nort(*flags);
19501 rcu_read_lock();
19502 sighand = rcu_dereference(tsk->sighand);
19503 if (unlikely(sighand == NULL)) {
19504 rcu_read_unlock();
19505- local_irq_restore(*flags);
19506+ local_irq_restore_nort(*flags);
19507 break;
19508 }
19509
19510@@ -1300,7 +1407,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
19511 }
19512 spin_unlock(&sighand->siglock);
19513 rcu_read_unlock();
19514- local_irq_restore(*flags);
19515+ local_irq_restore_nort(*flags);
19516 }
19517
19518 return sighand;
19519@@ -1545,7 +1652,8 @@ EXPORT_SYMBOL(kill_pid);
19520 */
19521 struct sigqueue *sigqueue_alloc(void)
19522 {
19523- struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
19524+ /* Preallocated sigqueue objects always from the slabcache ! */
19525+ struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
19526
19527 if (q)
19528 q->flags |= SIGQUEUE_PREALLOC;
19529@@ -1902,15 +2010,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
19530 if (gstop_done && ptrace_reparented(current))
19531 do_notify_parent_cldstop(current, false, why);
19532
19533- /*
19534- * Don't want to allow preemption here, because
19535- * sys_ptrace() needs this task to be inactive.
19536- *
19537- * XXX: implement read_unlock_no_resched().
19538- */
19539- preempt_disable();
19540 read_unlock(&tasklist_lock);
19541- preempt_enable_no_resched();
19542 freezable_schedule();
19543 } else {
19544 /*
19545diff --git a/kernel/softirq.c b/kernel/softirq.c
19546index ed567ba..8447c8d 100644
19547--- a/kernel/softirq.c
19548+++ b/kernel/softirq.c
19549@@ -21,10 +21,12 @@
19550 #include <linux/freezer.h>
19551 #include <linux/kthread.h>
19552 #include <linux/rcupdate.h>
19553+#include <linux/delay.h>
19554 #include <linux/ftrace.h>
19555 #include <linux/smp.h>
19556 #include <linux/smpboot.h>
19557 #include <linux/tick.h>
19558+#include <linux/locallock.h>
19559
19560 #define CREATE_TRACE_POINTS
19561 #include <trace/events/irq.h>
19562@@ -62,6 +64,98 @@ char *softirq_to_name[NR_SOFTIRQS] = {
19563 "TASKLET", "SCHED", "HRTIMER", "RCU"
19564 };
19565
19566+#ifdef CONFIG_NO_HZ
19567+# ifdef CONFIG_PREEMPT_RT_FULL
19568+
19569+struct softirq_runner {
19570+ struct task_struct *runner[NR_SOFTIRQS];
19571+};
19572+
19573+static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
19574+
19575+static inline void softirq_set_runner(unsigned int sirq)
19576+{
19577+ struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
19578+
19579+ sr->runner[sirq] = current;
19580+}
19581+
19582+static inline void softirq_clr_runner(unsigned int sirq)
19583+{
19584+ struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
19585+
19586+ sr->runner[sirq] = NULL;
19587+}
19588+
19589+/*
19590+ * On preempt-rt a softirq running context might be blocked on a
19591+ * lock. There might be no other runnable task on this CPU because the
19592+ * lock owner runs on some other CPU. So we have to go into idle with
19593+ * the pending bit set. Therefor we need to check this otherwise we
19594+ * warn about false positives which confuses users and defeats the
19595+ * whole purpose of this test.
19596+ *
19597+ * This code is called with interrupts disabled.
19598+ */
19599+void softirq_check_pending_idle(void)
19600+{
19601+ static int rate_limit;
19602+ struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
19603+ u32 warnpending;
19604+ int i;
19605+
19606+ if (rate_limit >= 10)
19607+ return;
19608+
19609+ warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
19610+ for (i = 0; i < NR_SOFTIRQS; i++) {
19611+ struct task_struct *tsk = sr->runner[i];
19612+
19613+ /*
19614+ * The wakeup code in rtmutex.c wakes up the task
19615+ * _before_ it sets pi_blocked_on to NULL under
19616+ * tsk->pi_lock. So we need to check for both: state
19617+ * and pi_blocked_on.
19618+ */
19619+ if (tsk) {
19620+ raw_spin_lock(&tsk->pi_lock);
19621+ if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
19622+ /* Clear all bits pending in that task */
19623+ warnpending &= ~(tsk->softirqs_raised);
19624+ warnpending &= ~(1 << i);
19625+ }
19626+ raw_spin_unlock(&tsk->pi_lock);
19627+ }
19628+ }
19629+
19630+ if (warnpending) {
19631+ printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
19632+ warnpending);
19633+ rate_limit++;
19634+ }
19635+}
19636+# else
19637+/*
19638+ * On !PREEMPT_RT we just printk rate limited:
19639+ */
19640+void softirq_check_pending_idle(void)
19641+{
19642+ static int rate_limit;
19643+
19644+ if (rate_limit < 10 &&
19645+ (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
19646+ printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
19647+ local_softirq_pending());
19648+ rate_limit++;
19649+ }
19650+}
19651+# endif
19652+
19653+#else /* !NO_HZ */
19654+static inline void softirq_set_runner(unsigned int sirq) { }
19655+static inline void softirq_clr_runner(unsigned int sirq) { }
19656+#endif
19657+
19658 /*
19659 * we cannot loop indefinitely here to avoid userspace starvation,
19660 * but we also don't want to introduce a worst case 1/HZ latency
19661@@ -77,6 +171,57 @@ static void wakeup_softirqd(void)
19662 wake_up_process(tsk);
19663 }
19664
19665+static void handle_softirq(unsigned int vec_nr, int cpu, int need_rcu_bh_qs)
19666+{
19667+ struct softirq_action *h = softirq_vec + vec_nr;
19668+ unsigned int prev_count = preempt_count();
19669+
19670+ kstat_incr_softirqs_this_cpu(vec_nr);
19671+ trace_softirq_entry(vec_nr);
19672+ h->action(h);
19673+ trace_softirq_exit(vec_nr);
19674+
19675+ if (unlikely(prev_count != preempt_count())) {
19676+ pr_err("softirq %u %s %p preempt count leak: %08x -> %08x\n",
19677+ vec_nr, softirq_to_name[vec_nr], h->action,
19678+ prev_count, (unsigned int) preempt_count());
19679+ preempt_count() = prev_count;
19680+ }
19681+ if (need_rcu_bh_qs)
19682+ rcu_bh_qs(cpu);
19683+}
19684+
19685+#ifndef CONFIG_PREEMPT_RT_FULL
19686+static inline int ksoftirqd_softirq_pending(void)
19687+{
19688+ return local_softirq_pending();
19689+}
19690+
19691+static void handle_pending_softirqs(u32 pending, int cpu, int need_rcu_bh_qs)
19692+{
19693+ unsigned int vec_nr;
19694+
19695+ local_irq_enable();
19696+ for (vec_nr = 0; pending; vec_nr++, pending >>= 1) {
19697+ if (pending & 1)
19698+ handle_softirq(vec_nr, cpu, need_rcu_bh_qs);
19699+ }
19700+ local_irq_disable();
19701+}
19702+
19703+static void run_ksoftirqd(unsigned int cpu)
19704+{
19705+ local_irq_disable();
19706+ if (ksoftirqd_softirq_pending()) {
19707+ __do_softirq();
19708+ rcu_note_context_switch(cpu);
19709+ local_irq_enable();
19710+ cond_resched();
19711+ return;
19712+ }
19713+ local_irq_enable();
19714+}
19715+
19716 /*
19717 * preempt_count and SOFTIRQ_OFFSET usage:
19718 * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
19719@@ -207,7 +352,6 @@ EXPORT_SYMBOL(local_bh_enable_ip);
19720
19721 asmlinkage void __do_softirq(void)
19722 {
19723- struct softirq_action *h;
19724 __u32 pending;
19725 int max_restart = MAX_SOFTIRQ_RESTART;
19726 int cpu;
19727@@ -224,7 +368,7 @@ asmlinkage void __do_softirq(void)
19728 vtime_account_irq_enter(current);
19729
19730 __local_bh_disable((unsigned long)__builtin_return_address(0),
19731- SOFTIRQ_OFFSET);
19732+ SOFTIRQ_OFFSET);
19733 lockdep_softirq_enter();
19734
19735 cpu = smp_processor_id();
19736@@ -232,36 +376,7 @@ restart:
19737 /* Reset the pending bitmask before enabling irqs */
19738 set_softirq_pending(0);
19739
19740- local_irq_enable();
19741-
19742- h = softirq_vec;
19743-
19744- do {
19745- if (pending & 1) {
19746- unsigned int vec_nr = h - softirq_vec;
19747- int prev_count = preempt_count();
19748-
19749- kstat_incr_softirqs_this_cpu(vec_nr);
19750-
19751- trace_softirq_entry(vec_nr);
19752- h->action(h);
19753- trace_softirq_exit(vec_nr);
19754- if (unlikely(prev_count != preempt_count())) {
19755- printk(KERN_ERR "huh, entered softirq %u %s %p"
19756- "with preempt_count %08x,"
19757- " exited with %08x?\n", vec_nr,
19758- softirq_to_name[vec_nr], h->action,
19759- prev_count, preempt_count());
19760- preempt_count() = prev_count;
19761- }
19762-
19763- rcu_bh_qs(cpu);
19764- }
19765- h++;
19766- pending >>= 1;
19767- } while (pending);
19768-
19769- local_irq_disable();
19770+ handle_pending_softirqs(pending, cpu, 1);
19771
19772 pending = local_softirq_pending();
19773 if (pending && --max_restart)
19774@@ -300,6 +415,259 @@ asmlinkage void do_softirq(void)
19775 #endif
19776
19777 /*
19778+ * This function must run with irqs disabled!
19779+ */
19780+void raise_softirq_irqoff(unsigned int nr)
19781+{
19782+ __raise_softirq_irqoff(nr);
19783+
19784+ /*
19785+ * If we're in an interrupt or softirq, we're done
19786+ * (this also catches softirq-disabled code). We will
19787+ * actually run the softirq once we return from
19788+ * the irq or softirq.
19789+ *
19790+ * Otherwise we wake up ksoftirqd to make sure we
19791+ * schedule the softirq soon.
19792+ */
19793+ if (!in_interrupt())
19794+ wakeup_softirqd();
19795+}
19796+
19797+void __raise_softirq_irqoff(unsigned int nr)
19798+{
19799+ trace_softirq_raise(nr);
19800+ or_softirq_pending(1UL << nr);
19801+}
19802+
19803+static inline void local_bh_disable_nort(void) { local_bh_disable(); }
19804+static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
19805+static void ksoftirqd_set_sched_params(unsigned int cpu) { }
19806+static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) { }
19807+
19808+#else /* !PREEMPT_RT_FULL */
19809+
19810+/*
19811+ * On RT we serialize softirq execution with a cpu local lock per softirq
19812+ */
19813+static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
19814+
19815+void __init softirq_early_init(void)
19816+{
19817+ int i;
19818+
19819+ for (i = 0; i < NR_SOFTIRQS; i++)
19820+ local_irq_lock_init(local_softirq_locks[i]);
19821+}
19822+
19823+static void lock_softirq(int which)
19824+{
19825+ __local_lock(&__get_cpu_var(local_softirq_locks[which]));
19826+}
19827+
19828+static void unlock_softirq(int which)
19829+{
19830+ __local_unlock(&__get_cpu_var(local_softirq_locks[which]));
19831+}
19832+
19833+static void do_single_softirq(int which, int need_rcu_bh_qs)
19834+{
19835+ unsigned long old_flags = current->flags;
19836+
19837+ current->flags &= ~PF_MEMALLOC;
19838+ vtime_account(current);
19839+ current->flags |= PF_IN_SOFTIRQ;
19840+ lockdep_softirq_enter();
19841+ local_irq_enable();
19842+ handle_softirq(which, smp_processor_id(), need_rcu_bh_qs);
19843+ local_irq_disable();
19844+ lockdep_softirq_exit();
19845+ current->flags &= ~PF_IN_SOFTIRQ;
19846+ vtime_account(current);
19847+ tsk_restore_flags(current, old_flags, PF_MEMALLOC);
19848+}
19849+
19850+/*
19851+ * Called with interrupts disabled. Process softirqs which were raised
19852+ * in current context (or on behalf of ksoftirqd).
19853+ */
19854+static void do_current_softirqs(int need_rcu_bh_qs)
19855+{
19856+ while (current->softirqs_raised) {
19857+ int i = __ffs(current->softirqs_raised);
19858+ unsigned int pending, mask = (1U << i);
19859+
19860+ current->softirqs_raised &= ~mask;
19861+ local_irq_enable();
19862+
19863+ /*
19864+ * If the lock is contended, we boost the owner to
19865+ * process the softirq or leave the critical section
19866+ * now.
19867+ */
19868+ lock_softirq(i);
19869+ local_irq_disable();
19870+ softirq_set_runner(i);
19871+ /*
19872+ * Check with the local_softirq_pending() bits,
19873+ * whether we need to process this still or if someone
19874+ * else took care of it.
19875+ */
19876+ pending = local_softirq_pending();
19877+ if (pending & mask) {
19878+ set_softirq_pending(pending & ~mask);
19879+ do_single_softirq(i, need_rcu_bh_qs);
19880+ }
19881+ softirq_clr_runner(i);
19882+ unlock_softirq(i);
19883+ WARN_ON(current->softirq_nestcnt != 1);
19884+ }
19885+}
19886+
19887+void local_bh_disable(void)
19888+{
19889+ migrate_disable();
19890+ current->softirq_nestcnt++;
19891+}
19892+EXPORT_SYMBOL(local_bh_disable);
19893+
19894+void local_bh_enable(void)
19895+{
19896+ if (WARN_ON(current->softirq_nestcnt == 0))
19897+ return;
19898+
19899+ local_irq_disable();
19900+ if (current->softirq_nestcnt == 1 && current->softirqs_raised)
19901+ do_current_softirqs(1);
19902+ local_irq_enable();
19903+
19904+ current->softirq_nestcnt--;
19905+ migrate_enable();
19906+}
19907+EXPORT_SYMBOL(local_bh_enable);
19908+
19909+void local_bh_enable_ip(unsigned long ip)
19910+{
19911+ local_bh_enable();
19912+}
19913+EXPORT_SYMBOL(local_bh_enable_ip);
19914+
19915+void _local_bh_enable(void)
19916+{
19917+ current->softirq_nestcnt--;
19918+ migrate_enable();
19919+}
19920+EXPORT_SYMBOL(_local_bh_enable);
19921+
19922+int in_serving_softirq(void)
19923+{
19924+ return current->flags & PF_IN_SOFTIRQ;
19925+}
19926+EXPORT_SYMBOL(in_serving_softirq);
19927+
19928+/* Called with preemption disabled */
19929+static void run_ksoftirqd(unsigned int cpu)
19930+{
19931+ local_irq_disable();
19932+ current->softirq_nestcnt++;
19933+ do_current_softirqs(1);
19934+ current->softirq_nestcnt--;
19935+ rcu_note_context_switch(cpu);
19936+ local_irq_enable();
19937+}
19938+
19939+/*
19940+ * Called from netif_rx_ni(). Preemption enabled, but migration
19941+ * disabled. So the cpu can't go away under us.
19942+ */
19943+void thread_do_softirq(void)
19944+{
19945+ if (!in_serving_softirq() && current->softirqs_raised) {
19946+ current->softirq_nestcnt++;
19947+ do_current_softirqs(0);
19948+ current->softirq_nestcnt--;
19949+ }
19950+}
19951+
19952+static void do_raise_softirq_irqoff(unsigned int nr)
19953+{
19954+ trace_softirq_raise(nr);
19955+ or_softirq_pending(1UL << nr);
19956+
19957+ /*
19958+ * If we are not in a hard interrupt and inside a bh disabled
19959+ * region, we simply raise the flag on current. local_bh_enable()
19960+ * will make sure that the softirq is executed. Otherwise we
19961+ * delegate it to ksoftirqd.
19962+ */
19963+ if (!in_irq() && current->softirq_nestcnt)
19964+ current->softirqs_raised |= (1U << nr);
19965+ else if (__this_cpu_read(ksoftirqd))
19966+ __this_cpu_read(ksoftirqd)->softirqs_raised |= (1U << nr);
19967+}
19968+
19969+void __raise_softirq_irqoff(unsigned int nr)
19970+{
19971+ do_raise_softirq_irqoff(nr);
19972+ if (!in_irq() && !current->softirq_nestcnt)
19973+ wakeup_softirqd();
19974+}
19975+
19976+/*
19977+ * This function must run with irqs disabled!
19978+ */
19979+void raise_softirq_irqoff(unsigned int nr)
19980+{
19981+ do_raise_softirq_irqoff(nr);
19982+
19983+ /*
19984+ * If we're in an hard interrupt we let irq return code deal
19985+ * with the wakeup of ksoftirqd.
19986+ */
19987+ if (in_irq())
19988+ return;
19989+
19990+ /*
19991+ * If we are in thread context but outside of a bh disabled
19992+ * region, we need to wake ksoftirqd as well.
19993+ *
19994+ * CHECKME: Some of the places which do that could be wrapped
19995+ * into local_bh_disable/enable pairs. Though it's unclear
19996+ * whether this is worth the effort. To find those places just
19997+ * raise a WARN() if the condition is met.
19998+ */
19999+ if (!current->softirq_nestcnt)
20000+ wakeup_softirqd();
20001+}
20002+
20003+static inline int ksoftirqd_softirq_pending(void)
20004+{
20005+ return current->softirqs_raised;
20006+}
20007+
20008+static inline void local_bh_disable_nort(void) { }
20009+static inline void _local_bh_enable_nort(void) { }
20010+
20011+static inline void ksoftirqd_set_sched_params(unsigned int cpu)
20012+{
20013+ struct sched_param param = { .sched_priority = 1 };
20014+
20015+ sched_setscheduler(current, SCHED_FIFO, &param);
20016+ /* Take over all pending softirqs when starting */
20017+ local_irq_disable();
20018+ current->softirqs_raised = local_softirq_pending();
20019+ local_irq_enable();
20020+}
20021+
20022+static inline void ksoftirqd_clr_sched_params(unsigned int cpu, bool online)
20023+{
20024+ struct sched_param param = { .sched_priority = 0 };
20025+
20026+ sched_setscheduler(current, SCHED_NORMAL, &param);
20027+}
20028+
20029+#endif /* PREEMPT_RT_FULL */
20030+/*
20031 * Enter an interrupt context.
20032 */
20033 void irq_enter(void)
20034@@ -312,9 +680,9 @@ void irq_enter(void)
20035 * Prevent raise_softirq from needlessly waking up ksoftirqd
20036 * here, as softirq will be serviced on return from interrupt.
20037 */
20038- local_bh_disable();
20039+ local_bh_disable_nort();
20040 tick_check_idle(cpu);
20041- _local_bh_enable();
20042+ _local_bh_enable_nort();
20043 }
20044
20045 __irq_enter();
20046@@ -322,6 +690,7 @@ void irq_enter(void)
20047
20048 static inline void invoke_softirq(void)
20049 {
20050+#ifndef CONFIG_PREEMPT_RT_FULL
20051 if (!force_irqthreads) {
20052 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
20053 __do_softirq();
20054@@ -334,6 +703,15 @@ static inline void invoke_softirq(void)
20055 wakeup_softirqd();
20056 __local_bh_enable(SOFTIRQ_OFFSET);
20057 }
20058+#else /* PREEMPT_RT_FULL */
20059+ unsigned long flags;
20060+
20061+ local_irq_save(flags);
20062+ if (__this_cpu_read(ksoftirqd) &&
20063+ __this_cpu_read(ksoftirqd)->softirqs_raised)
20064+ wakeup_softirqd();
20065+ local_irq_restore(flags);
20066+#endif
20067 }
20068
20069 /*
20070@@ -356,26 +734,6 @@ void irq_exit(void)
20071 sched_preempt_enable_no_resched();
20072 }
20073
20074-/*
20075- * This function must run with irqs disabled!
20076- */
20077-inline void raise_softirq_irqoff(unsigned int nr)
20078-{
20079- __raise_softirq_irqoff(nr);
20080-
20081- /*
20082- * If we're in an interrupt or softirq, we're done
20083- * (this also catches softirq-disabled code). We will
20084- * actually run the softirq once we return from
20085- * the irq or softirq.
20086- *
20087- * Otherwise we wake up ksoftirqd to make sure we
20088- * schedule the softirq soon.
20089- */
20090- if (!in_interrupt())
20091- wakeup_softirqd();
20092-}
20093-
20094 void raise_softirq(unsigned int nr)
20095 {
20096 unsigned long flags;
20097@@ -385,12 +743,6 @@ void raise_softirq(unsigned int nr)
20098 local_irq_restore(flags);
20099 }
20100
20101-void __raise_softirq_irqoff(unsigned int nr)
20102-{
20103- trace_softirq_raise(nr);
20104- or_softirq_pending(1UL << nr);
20105-}
20106-
20107 void open_softirq(int nr, void (*action)(struct softirq_action *))
20108 {
20109 softirq_vec[nr].action = action;
20110@@ -408,15 +760,45 @@ struct tasklet_head
20111 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
20112 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
20113
20114+static void inline
20115+__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
20116+{
20117+ if (tasklet_trylock(t)) {
20118+again:
20119+ /* We may have been preempted before tasklet_trylock
20120+ * and __tasklet_action may have already run.
20121+ * So double check the sched bit while the takslet
20122+ * is locked before adding it to the list.
20123+ */
20124+ if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
20125+ t->next = NULL;
20126+ *head->tail = t;
20127+ head->tail = &(t->next);
20128+ raise_softirq_irqoff(nr);
20129+ tasklet_unlock(t);
20130+ } else {
20131+ /* This is subtle. If we hit the corner case above
20132+ * It is possible that we get preempted right here,
20133+ * and another task has successfully called
20134+ * tasklet_schedule(), then this function, and
20135+ * failed on the trylock. Thus we must be sure
20136+ * before releasing the tasklet lock, that the
20137+ * SCHED_BIT is clear. Otherwise the tasklet
20138+ * may get its SCHED_BIT set, but not added to the
20139+ * list
20140+ */
20141+ if (!tasklet_tryunlock(t))
20142+ goto again;
20143+ }
20144+ }
20145+}
20146+
20147 void __tasklet_schedule(struct tasklet_struct *t)
20148 {
20149 unsigned long flags;
20150
20151 local_irq_save(flags);
20152- t->next = NULL;
20153- *__this_cpu_read(tasklet_vec.tail) = t;
20154- __this_cpu_write(tasklet_vec.tail, &(t->next));
20155- raise_softirq_irqoff(TASKLET_SOFTIRQ);
20156+ __tasklet_common_schedule(t, &__get_cpu_var(tasklet_vec), TASKLET_SOFTIRQ);
20157 local_irq_restore(flags);
20158 }
20159
20160@@ -427,10 +809,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
20161 unsigned long flags;
20162
20163 local_irq_save(flags);
20164- t->next = NULL;
20165- *__this_cpu_read(tasklet_hi_vec.tail) = t;
20166- __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
20167- raise_softirq_irqoff(HI_SOFTIRQ);
20168+ __tasklet_common_schedule(t, &__get_cpu_var(tasklet_hi_vec), HI_SOFTIRQ);
20169 local_irq_restore(flags);
20170 }
20171
20172@@ -438,50 +817,119 @@ EXPORT_SYMBOL(__tasklet_hi_schedule);
20173
20174 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
20175 {
20176- BUG_ON(!irqs_disabled());
20177-
20178- t->next = __this_cpu_read(tasklet_hi_vec.head);
20179- __this_cpu_write(tasklet_hi_vec.head, t);
20180- __raise_softirq_irqoff(HI_SOFTIRQ);
20181+ __tasklet_hi_schedule(t);
20182 }
20183
20184 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
20185
20186-static void tasklet_action(struct softirq_action *a)
20187+void tasklet_enable(struct tasklet_struct *t)
20188 {
20189- struct tasklet_struct *list;
20190+ if (!atomic_dec_and_test(&t->count))
20191+ return;
20192+ if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
20193+ tasklet_schedule(t);
20194+}
20195
20196- local_irq_disable();
20197- list = __this_cpu_read(tasklet_vec.head);
20198- __this_cpu_write(tasklet_vec.head, NULL);
20199- __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
20200- local_irq_enable();
20201+EXPORT_SYMBOL(tasklet_enable);
20202+
20203+void tasklet_hi_enable(struct tasklet_struct *t)
20204+{
20205+ if (!atomic_dec_and_test(&t->count))
20206+ return;
20207+ if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
20208+ tasklet_hi_schedule(t);
20209+}
20210+
20211+EXPORT_SYMBOL(tasklet_hi_enable);
20212+
20213+static void
20214+__tasklet_action(struct softirq_action *a, struct tasklet_struct *list)
20215+{
20216+ int loops = 1000000;
20217
20218 while (list) {
20219 struct tasklet_struct *t = list;
20220
20221 list = list->next;
20222
20223- if (tasklet_trylock(t)) {
20224- if (!atomic_read(&t->count)) {
20225- if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
20226- BUG();
20227- t->func(t->data);
20228- tasklet_unlock(t);
20229- continue;
20230- }
20231- tasklet_unlock(t);
20232+ /*
20233+ * Should always succeed - after a tasklist got on the
20234+ * list (after getting the SCHED bit set from 0 to 1),
20235+ * nothing but the tasklet softirq it got queued to can
20236+ * lock it:
20237+ */
20238+ if (!tasklet_trylock(t)) {
20239+ WARN_ON(1);
20240+ continue;
20241 }
20242
20243- local_irq_disable();
20244 t->next = NULL;
20245- *__this_cpu_read(tasklet_vec.tail) = t;
20246- __this_cpu_write(tasklet_vec.tail, &(t->next));
20247- __raise_softirq_irqoff(TASKLET_SOFTIRQ);
20248- local_irq_enable();
20249+
20250+ /*
20251+ * If we cannot handle the tasklet because it's disabled,
20252+ * mark it as pending. tasklet_enable() will later
20253+ * re-schedule the tasklet.
20254+ */
20255+ if (unlikely(atomic_read(&t->count))) {
20256+out_disabled:
20257+ /* implicit unlock: */
20258+ wmb();
20259+ t->state = TASKLET_STATEF_PENDING;
20260+ continue;
20261+ }
20262+
20263+ /*
20264+ * After this point on the tasklet might be rescheduled
20265+ * on another CPU, but it can only be added to another
20266+ * CPU's tasklet list if we unlock the tasklet (which we
20267+ * dont do yet).
20268+ */
20269+ if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
20270+ WARN_ON(1);
20271+
20272+again:
20273+ t->func(t->data);
20274+
20275+ /*
20276+ * Try to unlock the tasklet. We must use cmpxchg, because
20277+ * another CPU might have scheduled or disabled the tasklet.
20278+ * We only allow the STATE_RUN -> 0 transition here.
20279+ */
20280+ while (!tasklet_tryunlock(t)) {
20281+ /*
20282+ * If it got disabled meanwhile, bail out:
20283+ */
20284+ if (atomic_read(&t->count))
20285+ goto out_disabled;
20286+ /*
20287+ * If it got scheduled meanwhile, re-execute
20288+ * the tasklet function:
20289+ */
20290+ if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
20291+ goto again;
20292+ if (!--loops) {
20293+ printk("hm, tasklet state: %08lx\n", t->state);
20294+ WARN_ON(1);
20295+ tasklet_unlock(t);
20296+ break;
20297+ }
20298+ }
20299 }
20300 }
20301
20302+static void tasklet_action(struct softirq_action *a)
20303+{
20304+ struct tasklet_struct *list;
20305+
20306+ local_irq_disable();
20307+ list = __get_cpu_var(tasklet_vec).head;
20308+ __get_cpu_var(tasklet_vec).head = NULL;
20309+ __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
20310+ local_irq_enable();
20311+
20312+ __tasklet_action(a, list);
20313+}
20314+
20315 static void tasklet_hi_action(struct softirq_action *a)
20316 {
20317 struct tasklet_struct *list;
20318@@ -492,29 +940,7 @@ static void tasklet_hi_action(struct softirq_action *a)
20319 __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
20320 local_irq_enable();
20321
20322- while (list) {
20323- struct tasklet_struct *t = list;
20324-
20325- list = list->next;
20326-
20327- if (tasklet_trylock(t)) {
20328- if (!atomic_read(&t->count)) {
20329- if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
20330- BUG();
20331- t->func(t->data);
20332- tasklet_unlock(t);
20333- continue;
20334- }
20335- tasklet_unlock(t);
20336- }
20337-
20338- local_irq_disable();
20339- t->next = NULL;
20340- *__this_cpu_read(tasklet_hi_vec.tail) = t;
20341- __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
20342- __raise_softirq_irqoff(HI_SOFTIRQ);
20343- local_irq_enable();
20344- }
20345+ __tasklet_action(a, list);
20346 }
20347
20348
20349@@ -537,7 +963,7 @@ void tasklet_kill(struct tasklet_struct *t)
20350
20351 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
20352 do {
20353- yield();
20354+ msleep(1);
20355 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
20356 }
20357 tasklet_unlock_wait(t);
20358@@ -743,22 +1169,26 @@ void __init softirq_init(void)
20359 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
20360 }
20361
20362-static int ksoftirqd_should_run(unsigned int cpu)
20363+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
20364+void tasklet_unlock_wait(struct tasklet_struct *t)
20365 {
20366- return local_softirq_pending();
20367+ while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
20368+ /*
20369+ * Hack for now to avoid this busy-loop:
20370+ */
20371+#ifdef CONFIG_PREEMPT_RT_FULL
20372+ msleep(1);
20373+#else
20374+ barrier();
20375+#endif
20376+ }
20377 }
20378+EXPORT_SYMBOL(tasklet_unlock_wait);
20379+#endif
20380
20381-static void run_ksoftirqd(unsigned int cpu)
20382+static int ksoftirqd_should_run(unsigned int cpu)
20383 {
20384- local_irq_disable();
20385- if (local_softirq_pending()) {
20386- __do_softirq();
20387- rcu_note_context_switch(cpu);
20388- local_irq_enable();
20389- cond_resched();
20390- return;
20391- }
20392- local_irq_enable();
20393+ return ksoftirqd_softirq_pending();
20394 }
20395
20396 #ifdef CONFIG_HOTPLUG_CPU
20397@@ -841,6 +1271,8 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
20398
20399 static struct smp_hotplug_thread softirq_threads = {
20400 .store = &ksoftirqd,
20401+ .setup = ksoftirqd_set_sched_params,
20402+ .cleanup = ksoftirqd_clr_sched_params,
20403 .thread_should_run = ksoftirqd_should_run,
20404 .thread_fn = run_ksoftirqd,
20405 .thread_comm = "ksoftirqd/%u",
20406diff --git a/kernel/spinlock.c b/kernel/spinlock.c
20407index 5cdd806..da9775b 100644
20408--- a/kernel/spinlock.c
20409+++ b/kernel/spinlock.c
20410@@ -110,8 +110,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
20411 * __[spin|read|write]_lock_bh()
20412 */
20413 BUILD_LOCK_OPS(spin, raw_spinlock);
20414+
20415+#ifndef CONFIG_PREEMPT_RT_FULL
20416 BUILD_LOCK_OPS(read, rwlock);
20417 BUILD_LOCK_OPS(write, rwlock);
20418+#endif
20419
20420 #endif
20421
20422@@ -195,6 +198,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
20423 EXPORT_SYMBOL(_raw_spin_unlock_bh);
20424 #endif
20425
20426+#ifndef CONFIG_PREEMPT_RT_FULL
20427+
20428 #ifndef CONFIG_INLINE_READ_TRYLOCK
20429 int __lockfunc _raw_read_trylock(rwlock_t *lock)
20430 {
20431@@ -339,6 +344,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
20432 EXPORT_SYMBOL(_raw_write_unlock_bh);
20433 #endif
20434
20435+#endif /* !PREEMPT_RT_FULL */
20436+
20437 #ifdef CONFIG_DEBUG_LOCK_ALLOC
20438
20439 void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
20440diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
20441index 2f194e9..e98c70b 100644
20442--- a/kernel/stop_machine.c
20443+++ b/kernel/stop_machine.c
20444@@ -29,12 +29,12 @@ struct cpu_stop_done {
20445 atomic_t nr_todo; /* nr left to execute */
20446 bool executed; /* actually executed? */
20447 int ret; /* collected return value */
20448- struct completion completion; /* fired if nr_todo reaches 0 */
20449+ struct task_struct *waiter; /* woken when nr_todo reaches 0 */
20450 };
20451
20452 /* the actual stopper, one per every possible cpu, enabled on online cpus */
20453 struct cpu_stopper {
20454- spinlock_t lock;
20455+ raw_spinlock_t lock;
20456 bool enabled; /* is this stopper enabled? */
20457 struct list_head works; /* list of pending works */
20458 struct task_struct *thread; /* stopper thread */
20459@@ -47,7 +47,7 @@ static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
20460 {
20461 memset(done, 0, sizeof(*done));
20462 atomic_set(&done->nr_todo, nr_todo);
20463- init_completion(&done->completion);
20464+ done->waiter = current;
20465 }
20466
20467 /* signal completion unless @done is NULL */
20468@@ -56,8 +56,10 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
20469 if (done) {
20470 if (executed)
20471 done->executed = true;
20472- if (atomic_dec_and_test(&done->nr_todo))
20473- complete(&done->completion);
20474+ if (atomic_dec_and_test(&done->nr_todo)) {
20475+ wake_up_process(done->waiter);
20476+ done->waiter = NULL;
20477+ }
20478 }
20479 }
20480
20481@@ -67,7 +69,7 @@ static void cpu_stop_queue_work(struct cpu_stopper *stopper,
20482 {
20483 unsigned long flags;
20484
20485- spin_lock_irqsave(&stopper->lock, flags);
20486+ raw_spin_lock_irqsave(&stopper->lock, flags);
20487
20488 if (stopper->enabled) {
20489 list_add_tail(&work->list, &stopper->works);
20490@@ -75,7 +77,23 @@ static void cpu_stop_queue_work(struct cpu_stopper *stopper,
20491 } else
20492 cpu_stop_signal_done(work->done, false);
20493
20494- spin_unlock_irqrestore(&stopper->lock, flags);
20495+ raw_spin_unlock_irqrestore(&stopper->lock, flags);
20496+}
20497+
20498+static void wait_for_stop_done(struct cpu_stop_done *done)
20499+{
20500+ set_current_state(TASK_UNINTERRUPTIBLE);
20501+ while (atomic_read(&done->nr_todo)) {
20502+ schedule();
20503+ set_current_state(TASK_UNINTERRUPTIBLE);
20504+ }
20505+ /*
20506+ * We need to wait until cpu_stop_signal_done() has cleared
20507+ * done->waiter.
20508+ */
20509+ while (done->waiter)
20510+ cpu_relax();
20511+ set_current_state(TASK_RUNNING);
20512 }
20513
20514 /**
20515@@ -109,7 +127,7 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
20516
20517 cpu_stop_init_done(&done, 1);
20518 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work);
20519- wait_for_completion(&done.completion);
20520+ wait_for_stop_done(&done);
20521 return done.executed ? done.ret : -ENOENT;
20522 }
20523
20524@@ -135,11 +153,12 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
20525
20526 /* static data for stop_cpus */
20527 static DEFINE_MUTEX(stop_cpus_mutex);
20528+static DEFINE_MUTEX(stopper_lock);
20529 static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
20530
20531 static void queue_stop_cpus_work(const struct cpumask *cpumask,
20532 cpu_stop_fn_t fn, void *arg,
20533- struct cpu_stop_done *done)
20534+ struct cpu_stop_done *done, bool inactive)
20535 {
20536 struct cpu_stop_work *work;
20537 unsigned int cpu;
20538@@ -153,15 +172,19 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
20539 }
20540
20541 /*
20542- * Disable preemption while queueing to avoid getting
20543- * preempted by a stopper which might wait for other stoppers
20544- * to enter @fn which can lead to deadlock.
20545+ * Make sure that all work is queued on all cpus before we
20546+ * any of the cpus can execute it.
20547 */
20548- preempt_disable();
20549+ if (!inactive) {
20550+ mutex_lock(&stopper_lock);
20551+ } else {
20552+ while (!mutex_trylock(&stopper_lock))
20553+ cpu_relax();
20554+ }
20555 for_each_cpu(cpu, cpumask)
20556 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
20557 &per_cpu(stop_cpus_work, cpu));
20558- preempt_enable();
20559+ mutex_unlock(&stopper_lock);
20560 }
20561
20562 static int __stop_cpus(const struct cpumask *cpumask,
20563@@ -170,8 +193,8 @@ static int __stop_cpus(const struct cpumask *cpumask,
20564 struct cpu_stop_done done;
20565
20566 cpu_stop_init_done(&done, cpumask_weight(cpumask));
20567- queue_stop_cpus_work(cpumask, fn, arg, &done);
20568- wait_for_completion(&done.completion);
20569+ queue_stop_cpus_work(cpumask, fn, arg, &done, false);
20570+ wait_for_stop_done(&done);
20571 return done.executed ? done.ret : -ENOENT;
20572 }
20573
20574@@ -259,13 +282,13 @@ repeat:
20575 }
20576
20577 work = NULL;
20578- spin_lock_irq(&stopper->lock);
20579+ raw_spin_lock_irq(&stopper->lock);
20580 if (!list_empty(&stopper->works)) {
20581 work = list_first_entry(&stopper->works,
20582 struct cpu_stop_work, list);
20583 list_del_init(&work->list);
20584 }
20585- spin_unlock_irq(&stopper->lock);
20586+ raw_spin_unlock_irq(&stopper->lock);
20587
20588 if (work) {
20589 cpu_stop_fn_t fn = work->fn;
20590@@ -275,6 +298,16 @@ repeat:
20591
20592 __set_current_state(TASK_RUNNING);
20593
20594+ /*
20595+ * Wait until the stopper finished scheduling on all
20596+ * cpus
20597+ */
20598+ mutex_lock(&stopper_lock);
20599+ /*
20600+ * Let other cpu threads continue as well
20601+ */
20602+ mutex_unlock(&stopper_lock);
20603+
20604 /* cpu stop callbacks are not allowed to sleep */
20605 preempt_disable();
20606
20607@@ -289,7 +322,13 @@ repeat:
20608 kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
20609 ksym_buf), arg);
20610
20611+ /*
20612+ * Make sure that the wakeup and setting done->waiter
20613+ * to NULL is atomic.
20614+ */
20615+ local_irq_disable();
20616 cpu_stop_signal_done(done, true);
20617+ local_irq_enable();
20618 } else
20619 schedule();
20620
20621@@ -317,6 +356,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
20622 if (IS_ERR(p))
20623 return notifier_from_errno(PTR_ERR(p));
20624 get_task_struct(p);
20625+ p->flags |= PF_STOMPER;
20626 kthread_bind(p, cpu);
20627 sched_set_stop_task(cpu, p);
20628 stopper->thread = p;
20629@@ -326,9 +366,9 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
20630 /* strictly unnecessary, as first user will wake it */
20631 wake_up_process(stopper->thread);
20632 /* mark enabled */
20633- spin_lock_irq(&stopper->lock);
20634+ raw_spin_lock_irq(&stopper->lock);
20635 stopper->enabled = true;
20636- spin_unlock_irq(&stopper->lock);
20637+ raw_spin_unlock_irq(&stopper->lock);
20638 break;
20639
20640 #ifdef CONFIG_HOTPLUG_CPU
20641@@ -341,11 +381,11 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
20642 /* kill the stopper */
20643 kthread_stop(stopper->thread);
20644 /* drain remaining works */
20645- spin_lock_irq(&stopper->lock);
20646+ raw_spin_lock_irq(&stopper->lock);
20647 list_for_each_entry(work, &stopper->works, list)
20648 cpu_stop_signal_done(work->done, false);
20649 stopper->enabled = false;
20650- spin_unlock_irq(&stopper->lock);
20651+ raw_spin_unlock_irq(&stopper->lock);
20652 /* release the stopper */
20653 put_task_struct(stopper->thread);
20654 stopper->thread = NULL;
20655@@ -376,7 +416,7 @@ static int __init cpu_stop_init(void)
20656 for_each_possible_cpu(cpu) {
20657 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
20658
20659- spin_lock_init(&stopper->lock);
20660+ raw_spin_lock_init(&stopper->lock);
20661 INIT_LIST_HEAD(&stopper->works);
20662 }
20663
20664@@ -566,11 +606,11 @@ int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
20665 set_state(&smdata, STOPMACHINE_PREPARE);
20666 cpu_stop_init_done(&done, num_active_cpus());
20667 queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata,
20668- &done);
20669+ &done, true);
20670 ret = stop_machine_cpu_stop(&smdata);
20671
20672 /* Busy wait for completion. */
20673- while (!completion_done(&done.completion))
20674+ while (atomic_read(&done.nr_todo))
20675 cpu_relax();
20676
20677 mutex_unlock(&stop_cpus_mutex);
20678diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
20679index 7a925ba..ca9e113 100644
20680--- a/kernel/time/jiffies.c
20681+++ b/kernel/time/jiffies.c
20682@@ -67,7 +67,8 @@ static struct clocksource clocksource_jiffies = {
20683 .shift = JIFFIES_SHIFT,
20684 };
20685
20686-__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
20687+__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
20688+__cacheline_aligned_in_smp seqcount_t jiffies_seq;
20689
20690 #if (BITS_PER_LONG < 64)
20691 u64 get_jiffies_64(void)
20692@@ -76,9 +77,9 @@ u64 get_jiffies_64(void)
20693 u64 ret;
20694
20695 do {
20696- seq = read_seqbegin(&jiffies_lock);
20697+ seq = read_seqcount_begin(&jiffies_seq);
20698 ret = jiffies_64;
20699- } while (read_seqretry(&jiffies_lock, seq));
20700+ } while (read_seqcount_retry(&jiffies_seq, seq));
20701 return ret;
20702 }
20703 EXPORT_SYMBOL(get_jiffies_64);
20704diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
20705index 24174b4..bb1edfa 100644
20706--- a/kernel/time/ntp.c
20707+++ b/kernel/time/ntp.c
20708@@ -22,7 +22,7 @@
20709 * NTP timekeeping variables:
20710 */
20711
20712-DEFINE_SPINLOCK(ntp_lock);
20713+DEFINE_RAW_SPINLOCK(ntp_lock);
20714
20715
20716 /* USER_HZ period (usecs): */
20717@@ -347,7 +347,7 @@ void ntp_clear(void)
20718 {
20719 unsigned long flags;
20720
20721- spin_lock_irqsave(&ntp_lock, flags);
20722+ raw_spin_lock_irqsave(&ntp_lock, flags);
20723
20724 time_adjust = 0; /* stop active adjtime() */
20725 time_status |= STA_UNSYNC;
20726@@ -361,7 +361,7 @@ void ntp_clear(void)
20727
20728 /* Clear PPS state variables */
20729 pps_clear();
20730- spin_unlock_irqrestore(&ntp_lock, flags);
20731+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
20732
20733 }
20734
20735@@ -371,9 +371,9 @@ u64 ntp_tick_length(void)
20736 unsigned long flags;
20737 s64 ret;
20738
20739- spin_lock_irqsave(&ntp_lock, flags);
20740+ raw_spin_lock_irqsave(&ntp_lock, flags);
20741 ret = tick_length;
20742- spin_unlock_irqrestore(&ntp_lock, flags);
20743+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
20744 return ret;
20745 }
20746
20747@@ -394,7 +394,7 @@ int second_overflow(unsigned long secs)
20748 int leap = 0;
20749 unsigned long flags;
20750
20751- spin_lock_irqsave(&ntp_lock, flags);
20752+ raw_spin_lock_irqsave(&ntp_lock, flags);
20753
20754 /*
20755 * Leap second processing. If in leap-insert state at the end of the
20756@@ -478,7 +478,7 @@ int second_overflow(unsigned long secs)
20757 time_adjust = 0;
20758
20759 out:
20760- spin_unlock_irqrestore(&ntp_lock, flags);
20761+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
20762
20763 return leap;
20764 }
20765@@ -660,7 +660,7 @@ int do_adjtimex(struct timex *txc)
20766
20767 getnstimeofday(&ts);
20768
20769- spin_lock_irq(&ntp_lock);
20770+ raw_spin_lock_irq(&ntp_lock);
20771
20772 if (txc->modes & ADJ_ADJTIME) {
20773 long save_adjust = time_adjust;
20774@@ -702,7 +702,7 @@ int do_adjtimex(struct timex *txc)
20775 /* fill PPS status fields */
20776 pps_fill_timex(txc);
20777
20778- spin_unlock_irq(&ntp_lock);
20779+ raw_spin_unlock_irq(&ntp_lock);
20780
20781 txc->time.tv_sec = ts.tv_sec;
20782 txc->time.tv_usec = ts.tv_nsec;
20783@@ -900,7 +900,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
20784
20785 pts_norm = pps_normalize_ts(*phase_ts);
20786
20787- spin_lock_irqsave(&ntp_lock, flags);
20788+ raw_spin_lock_irqsave(&ntp_lock, flags);
20789
20790 /* clear the error bits, they will be set again if needed */
20791 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
20792@@ -913,7 +913,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
20793 * just start the frequency interval */
20794 if (unlikely(pps_fbase.tv_sec == 0)) {
20795 pps_fbase = *raw_ts;
20796- spin_unlock_irqrestore(&ntp_lock, flags);
20797+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
20798 return;
20799 }
20800
20801@@ -928,7 +928,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
20802 time_status |= STA_PPSJITTER;
20803 /* restart the frequency calibration interval */
20804 pps_fbase = *raw_ts;
20805- spin_unlock_irqrestore(&ntp_lock, flags);
20806+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
20807 pr_err("hardpps: PPSJITTER: bad pulse\n");
20808 return;
20809 }
20810@@ -945,7 +945,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
20811
20812 hardpps_update_phase(pts_norm.nsec);
20813
20814- spin_unlock_irqrestore(&ntp_lock, flags);
20815+ raw_spin_unlock_irqrestore(&ntp_lock, flags);
20816 }
20817 EXPORT_SYMBOL(hardpps);
20818
20819diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
20820index b1600a6..e8076c6 100644
20821--- a/kernel/time/tick-common.c
20822+++ b/kernel/time/tick-common.c
20823@@ -63,13 +63,15 @@ int tick_is_oneshot_available(void)
20824 static void tick_periodic(int cpu)
20825 {
20826 if (tick_do_timer_cpu == cpu) {
20827- write_seqlock(&jiffies_lock);
20828+ raw_spin_lock(&jiffies_lock);
20829+ write_seqcount_begin(&jiffies_seq);
20830
20831 /* Keep track of the next tick event */
20832 tick_next_period = ktime_add(tick_next_period, tick_period);
20833
20834 do_timer(1);
20835- write_sequnlock(&jiffies_lock);
20836+ write_seqcount_end(&jiffies_seq);
20837+ raw_spin_unlock(&jiffies_lock);
20838 }
20839
20840 update_process_times(user_mode(get_irq_regs()));
20841@@ -130,9 +132,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
20842 ktime_t next;
20843
20844 do {
20845- seq = read_seqbegin(&jiffies_lock);
20846+ seq = read_seqcount_begin(&jiffies_seq);
20847 next = tick_next_period;
20848- } while (read_seqretry(&jiffies_lock, seq));
20849+ } while (read_seqcount_retry(&jiffies_seq, seq));
20850
20851 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
20852
20853diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
20854index cf3e59e..ad8edee 100644
20855--- a/kernel/time/tick-internal.h
20856+++ b/kernel/time/tick-internal.h
20857@@ -4,6 +4,9 @@
20858 #include <linux/hrtimer.h>
20859 #include <linux/tick.h>
20860
20861+extern raw_spinlock_t jiffies_lock;
20862+extern seqcount_t jiffies_seq;
20863+
20864 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
20865
20866 #define TICK_DO_TIMER_NONE -1
20867diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
20868index e78feff..626b320f 100644
20869--- a/kernel/time/tick-sched.c
20870+++ b/kernel/time/tick-sched.c
20871@@ -56,7 +56,8 @@ static void tick_do_update_jiffies64(ktime_t now)
20872 return;
20873
20874 /* Reevalute with jiffies_lock held */
20875- write_seqlock(&jiffies_lock);
20876+ raw_spin_lock(&jiffies_lock);
20877+ write_seqcount_begin(&jiffies_seq);
20878
20879 delta = ktime_sub(now, last_jiffies_update);
20880 if (delta.tv64 >= tick_period.tv64) {
20881@@ -79,7 +80,8 @@ static void tick_do_update_jiffies64(ktime_t now)
20882 /* Keep the tick_next_period variable up to date */
20883 tick_next_period = ktime_add(last_jiffies_update, tick_period);
20884 }
20885- write_sequnlock(&jiffies_lock);
20886+ write_seqcount_end(&jiffies_seq);
20887+ raw_spin_unlock(&jiffies_lock);
20888 }
20889
20890 /*
20891@@ -89,12 +91,14 @@ static ktime_t tick_init_jiffy_update(void)
20892 {
20893 ktime_t period;
20894
20895- write_seqlock(&jiffies_lock);
20896+ raw_spin_lock(&jiffies_lock);
20897+ write_seqcount_begin(&jiffies_seq);
20898 /* Did we start the jiffies update yet ? */
20899 if (last_jiffies_update.tv64 == 0)
20900 last_jiffies_update = tick_next_period;
20901 period = last_jiffies_update;
20902- write_sequnlock(&jiffies_lock);
20903+ write_seqcount_end(&jiffies_seq);
20904+ raw_spin_unlock(&jiffies_lock);
20905 return period;
20906 }
20907
20908@@ -325,11 +329,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
20909
20910 /* Read jiffies and the time when jiffies were updated last */
20911 do {
20912- seq = read_seqbegin(&jiffies_lock);
20913+ seq = read_seqcount_begin(&jiffies_seq);
20914 last_update = last_jiffies_update;
20915 last_jiffies = jiffies;
20916 time_delta = timekeeping_max_deferment();
20917- } while (read_seqretry(&jiffies_lock, seq));
20918+ } while (read_seqcount_retry(&jiffies_seq, seq));
20919
20920 if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
20921 arch_needs_cpu(cpu)) {
20922@@ -477,14 +481,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
20923 return false;
20924
20925 if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
20926- static int ratelimit;
20927-
20928- if (ratelimit < 10 &&
20929- (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
20930- printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
20931- (unsigned int) local_softirq_pending());
20932- ratelimit++;
20933- }
20934+ softirq_check_pending_idle();
20935 return false;
20936 }
20937
20938@@ -863,6 +860,7 @@ void tick_setup_sched_timer(void)
20939 * Emulate tick processing via per-CPU hrtimers:
20940 */
20941 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
20942+ ts->sched_timer.irqsafe = 1;
20943 ts->sched_timer.function = tick_sched_timer;
20944
20945 /* Get the next period (per cpu) */
20946diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
20947index cbc6acb..ea93e56 100644
20948--- a/kernel/time/timekeeping.c
20949+++ b/kernel/time/timekeeping.c
20950@@ -23,8 +23,12 @@
20951 #include <linux/stop_machine.h>
20952 #include <linux/pvclock_gtod.h>
20953
20954+#include "tick-internal.h"
20955
20956 static struct timekeeper timekeeper;
20957+static DEFINE_RAW_SPINLOCK(timekeeper_lock);
20958+static seqcount_t timekeeper_seq;
20959+static struct timekeeper shadow_timekeeper;
20960
20961 /* flag for if timekeeping is suspended */
20962 int __read_mostly timekeeping_suspended;
20963@@ -93,7 +97,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
20964
20965 old_clock = tk->clock;
20966 tk->clock = clock;
20967- clock->cycle_last = clock->read(clock);
20968+ tk->cycle_last = clock->cycle_last = clock->read(clock);
20969
20970 /* Do the ns -> cycle conversion first, using original mult */
20971 tmp = NTP_INTERVAL_LENGTH;
20972@@ -184,8 +188,6 @@ static void update_pvclock_gtod(struct timekeeper *tk)
20973
20974 /**
20975 * pvclock_gtod_register_notifier - register a pvclock timedata update listener
20976- *
20977- * Must hold write on timekeeper.lock
20978 */
20979 int pvclock_gtod_register_notifier(struct notifier_block *nb)
20980 {
20981@@ -193,11 +195,10 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb)
20982 unsigned long flags;
20983 int ret;
20984
20985- write_seqlock_irqsave(&tk->lock, flags);
20986+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
20987 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
20988- /* update timekeeping data */
20989 update_pvclock_gtod(tk);
20990- write_sequnlock_irqrestore(&tk->lock, flags);
20991+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
20992
20993 return ret;
20994 }
20995@@ -206,25 +207,22 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
20996 /**
20997 * pvclock_gtod_unregister_notifier - unregister a pvclock
20998 * timedata update listener
20999- *
21000- * Must hold write on timekeeper.lock
21001 */
21002 int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
21003 {
21004- struct timekeeper *tk = &timekeeper;
21005 unsigned long flags;
21006 int ret;
21007
21008- write_seqlock_irqsave(&tk->lock, flags);
21009+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
21010 ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
21011- write_sequnlock_irqrestore(&tk->lock, flags);
21012+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
21013
21014 return ret;
21015 }
21016 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
21017
21018-/* must hold write on timekeeper.lock */
21019-static void timekeeping_update(struct timekeeper *tk, bool clearntp)
21020+/* must hold timekeeper_lock */
21021+static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror)
21022 {
21023 if (clearntp) {
21024 tk->ntp_error = 0;
21025@@ -232,6 +230,9 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
21026 }
21027 update_vsyscall(tk);
21028 update_pvclock_gtod(tk);
21029+
21030+ if (mirror)
21031+ memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
21032 }
21033
21034 /**
21035@@ -250,7 +251,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
21036 clock = tk->clock;
21037 cycle_now = clock->read(clock);
21038 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
21039- clock->cycle_last = cycle_now;
21040+ tk->cycle_last = clock->cycle_last = cycle_now;
21041
21042 tk->xtime_nsec += cycle_delta * tk->mult;
21043
21044@@ -278,12 +279,12 @@ void getnstimeofday(struct timespec *ts)
21045 WARN_ON(timekeeping_suspended);
21046
21047 do {
21048- seq = read_seqbegin(&tk->lock);
21049+ seq = read_seqcount_begin(&timekeeper_seq);
21050
21051 ts->tv_sec = tk->xtime_sec;
21052 nsecs = timekeeping_get_ns(tk);
21053
21054- } while (read_seqretry(&tk->lock, seq));
21055+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21056
21057 ts->tv_nsec = 0;
21058 timespec_add_ns(ts, nsecs);
21059@@ -299,11 +300,11 @@ ktime_t ktime_get(void)
21060 WARN_ON(timekeeping_suspended);
21061
21062 do {
21063- seq = read_seqbegin(&tk->lock);
21064+ seq = read_seqcount_begin(&timekeeper_seq);
21065 secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
21066 nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
21067
21068- } while (read_seqretry(&tk->lock, seq));
21069+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21070 /*
21071 * Use ktime_set/ktime_add_ns to create a proper ktime on
21072 * 32-bit architectures without CONFIG_KTIME_SCALAR.
21073@@ -330,12 +331,12 @@ void ktime_get_ts(struct timespec *ts)
21074 WARN_ON(timekeeping_suspended);
21075
21076 do {
21077- seq = read_seqbegin(&tk->lock);
21078+ seq = read_seqcount_begin(&timekeeper_seq);
21079 ts->tv_sec = tk->xtime_sec;
21080 nsec = timekeeping_get_ns(tk);
21081 tomono = tk->wall_to_monotonic;
21082
21083- } while (read_seqretry(&tk->lock, seq));
21084+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21085
21086 ts->tv_sec += tomono.tv_sec;
21087 ts->tv_nsec = 0;
21088@@ -363,7 +364,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
21089 WARN_ON_ONCE(timekeeping_suspended);
21090
21091 do {
21092- seq = read_seqbegin(&tk->lock);
21093+ seq = read_seqcount_begin(&timekeeper_seq);
21094
21095 *ts_raw = tk->raw_time;
21096 ts_real->tv_sec = tk->xtime_sec;
21097@@ -372,7 +373,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
21098 nsecs_raw = timekeeping_get_ns_raw(tk);
21099 nsecs_real = timekeeping_get_ns(tk);
21100
21101- } while (read_seqretry(&tk->lock, seq));
21102+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21103
21104 timespec_add_ns(ts_raw, nsecs_raw);
21105 timespec_add_ns(ts_real, nsecs_real);
21106@@ -412,7 +413,8 @@ int do_settimeofday(const struct timespec *tv)
21107 if (!timespec_valid_strict(tv))
21108 return -EINVAL;
21109
21110- write_seqlock_irqsave(&tk->lock, flags);
21111+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
21112+ write_seqcount_begin(&timekeeper_seq);
21113
21114 timekeeping_forward_now(tk);
21115
21116@@ -424,9 +426,10 @@ int do_settimeofday(const struct timespec *tv)
21117
21118 tk_set_xtime(tk, tv);
21119
21120- timekeeping_update(tk, true);
21121+ timekeeping_update(tk, true, true);
21122
21123- write_sequnlock_irqrestore(&tk->lock, flags);
21124+ write_seqcount_end(&timekeeper_seq);
21125+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
21126
21127 /* signal hrtimers about time change */
21128 clock_was_set();
21129@@ -451,7 +454,8 @@ int timekeeping_inject_offset(struct timespec *ts)
21130 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
21131 return -EINVAL;
21132
21133- write_seqlock_irqsave(&tk->lock, flags);
21134+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
21135+ write_seqcount_begin(&timekeeper_seq);
21136
21137 timekeeping_forward_now(tk);
21138
21139@@ -466,9 +470,10 @@ int timekeeping_inject_offset(struct timespec *ts)
21140 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
21141
21142 error: /* even if we error out, we forwarded the time, so call update */
21143- timekeeping_update(tk, true);
21144+ timekeeping_update(tk, true, true);
21145
21146- write_sequnlock_irqrestore(&tk->lock, flags);
21147+ write_seqcount_end(&timekeeper_seq);
21148+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
21149
21150 /* signal hrtimers about time change */
21151 clock_was_set();
21152@@ -490,7 +495,8 @@ static int change_clocksource(void *data)
21153
21154 new = (struct clocksource *) data;
21155
21156- write_seqlock_irqsave(&tk->lock, flags);
21157+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
21158+ write_seqcount_begin(&timekeeper_seq);
21159
21160 timekeeping_forward_now(tk);
21161 if (!new->enable || new->enable(new) == 0) {
21162@@ -499,9 +505,10 @@ static int change_clocksource(void *data)
21163 if (old->disable)
21164 old->disable(old);
21165 }
21166- timekeeping_update(tk, true);
21167+ timekeeping_update(tk, true, true);
21168
21169- write_sequnlock_irqrestore(&tk->lock, flags);
21170+ write_seqcount_end(&timekeeper_seq);
21171+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
21172
21173 return 0;
21174 }
21175@@ -551,11 +558,11 @@ void getrawmonotonic(struct timespec *ts)
21176 s64 nsecs;
21177
21178 do {
21179- seq = read_seqbegin(&tk->lock);
21180+ seq = read_seqcount_begin(&timekeeper_seq);
21181 nsecs = timekeeping_get_ns_raw(tk);
21182 *ts = tk->raw_time;
21183
21184- } while (read_seqretry(&tk->lock, seq));
21185+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21186
21187 timespec_add_ns(ts, nsecs);
21188 }
21189@@ -571,11 +578,11 @@ int timekeeping_valid_for_hres(void)
21190 int ret;
21191
21192 do {
21193- seq = read_seqbegin(&tk->lock);
21194+ seq = read_seqcount_begin(&timekeeper_seq);
21195
21196 ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
21197
21198- } while (read_seqretry(&tk->lock, seq));
21199+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21200
21201 return ret;
21202 }
21203@@ -590,11 +597,11 @@ u64 timekeeping_max_deferment(void)
21204 u64 ret;
21205
21206 do {
21207- seq = read_seqbegin(&tk->lock);
21208+ seq = read_seqcount_begin(&timekeeper_seq);
21209
21210 ret = tk->clock->max_idle_ns;
21211
21212- } while (read_seqretry(&tk->lock, seq));
21213+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21214
21215 return ret;
21216 }
21217@@ -655,11 +662,10 @@ void __init timekeeping_init(void)
21218 boot.tv_nsec = 0;
21219 }
21220
21221- seqlock_init(&tk->lock);
21222-
21223 ntp_init();
21224
21225- write_seqlock_irqsave(&tk->lock, flags);
21226+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
21227+ write_seqcount_begin(&timekeeper_seq);
21228 clock = clocksource_default_clock();
21229 if (clock->enable)
21230 clock->enable(clock);
21231@@ -678,7 +684,10 @@ void __init timekeeping_init(void)
21232 tmp.tv_nsec = 0;
21233 tk_set_sleep_time(tk, tmp);
21234
21235- write_sequnlock_irqrestore(&tk->lock, flags);
21236+ memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
21237+
21238+ write_seqcount_end(&timekeeper_seq);
21239+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
21240 }
21241
21242 /* time in seconds when suspend began */
21243@@ -725,15 +734,17 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
21244 if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
21245 return;
21246
21247- write_seqlock_irqsave(&tk->lock, flags);
21248+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
21249+ write_seqcount_begin(&timekeeper_seq);
21250
21251 timekeeping_forward_now(tk);
21252
21253 __timekeeping_inject_sleeptime(tk, delta);
21254
21255- timekeeping_update(tk, true);
21256+ timekeeping_update(tk, true, true);
21257
21258- write_sequnlock_irqrestore(&tk->lock, flags);
21259+ write_seqcount_end(&timekeeper_seq);
21260+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
21261
21262 /* signal hrtimers about time change */
21263 clock_was_set();
21264@@ -757,18 +768,20 @@ static void timekeeping_resume(void)
21265 clockevents_resume();
21266 clocksource_resume();
21267
21268- write_seqlock_irqsave(&tk->lock, flags);
21269+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
21270+ write_seqcount_begin(&timekeeper_seq);
21271
21272 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
21273 ts = timespec_sub(ts, timekeeping_suspend_time);
21274 __timekeeping_inject_sleeptime(tk, &ts);
21275 }
21276 /* re-base the last cycle value */
21277- tk->clock->cycle_last = tk->clock->read(tk->clock);
21278+ tk->cycle_last = tk->clock->cycle_last = tk->clock->read(tk->clock);
21279 tk->ntp_error = 0;
21280 timekeeping_suspended = 0;
21281- timekeeping_update(tk, false);
21282- write_sequnlock_irqrestore(&tk->lock, flags);
21283+ timekeeping_update(tk, false, true);
21284+ write_seqcount_end(&timekeeper_seq);
21285+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
21286
21287 touch_softlockup_watchdog();
21288
21289@@ -787,7 +800,8 @@ static int timekeeping_suspend(void)
21290
21291 read_persistent_clock(&timekeeping_suspend_time);
21292
21293- write_seqlock_irqsave(&tk->lock, flags);
21294+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
21295+ write_seqcount_begin(&timekeeper_seq);
21296 timekeeping_forward_now(tk);
21297 timekeeping_suspended = 1;
21298
21299@@ -810,7 +824,8 @@ static int timekeeping_suspend(void)
21300 timekeeping_suspend_time =
21301 timespec_add(timekeeping_suspend_time, delta_delta);
21302 }
21303- write_sequnlock_irqrestore(&tk->lock, flags);
21304+ write_seqcount_end(&timekeeper_seq);
21305+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
21306
21307 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
21308 clocksource_suspend();
21309@@ -1077,15 +1092,16 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
21310 static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
21311 u32 shift)
21312 {
21313+ cycle_t interval = tk->cycle_interval << shift;
21314 u64 raw_nsecs;
21315
21316 /* If the offset is smaller then a shifted interval, do nothing */
21317- if (offset < tk->cycle_interval<<shift)
21318+ if (offset < interval)
21319 return offset;
21320
21321 /* Accumulate one shifted interval */
21322- offset -= tk->cycle_interval << shift;
21323- tk->clock->cycle_last += tk->cycle_interval << shift;
21324+ offset -= interval;
21325+ tk->cycle_last += interval;
21326
21327 tk->xtime_nsec += tk->xtime_interval << shift;
21328 accumulate_nsecs_to_secs(tk);
21329@@ -1142,27 +1158,28 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
21330 static void update_wall_time(void)
21331 {
21332 struct clocksource *clock;
21333- struct timekeeper *tk = &timekeeper;
21334+ struct timekeeper *real_tk = &timekeeper;
21335+ struct timekeeper *tk = &shadow_timekeeper;
21336 cycle_t offset;
21337 int shift = 0, maxshift;
21338 unsigned long flags;
21339
21340- write_seqlock_irqsave(&tk->lock, flags);
21341+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
21342
21343 /* Make sure we're fully resumed: */
21344 if (unlikely(timekeeping_suspended))
21345 goto out;
21346
21347- clock = tk->clock;
21348+ clock = real_tk->clock;
21349
21350 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
21351- offset = tk->cycle_interval;
21352+ offset = real_tk->cycle_interval;
21353 #else
21354 offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
21355 #endif
21356
21357 /* Check if there's really nothing to do */
21358- if (offset < tk->cycle_interval)
21359+ if (offset < real_tk->cycle_interval)
21360 goto out;
21361
21362 /*
21363@@ -1199,11 +1216,24 @@ static void update_wall_time(void)
21364 */
21365 accumulate_nsecs_to_secs(tk);
21366
21367- timekeeping_update(tk, false);
21368-
21369+ write_seqcount_begin(&timekeeper_seq);
21370+ /* Update clock->cycle_last with the new value */
21371+ clock->cycle_last = tk->cycle_last;
21372+ /*
21373+ * Update the real timekeeper.
21374+ *
21375+ * We could avoid this memcpy by switching pointers, but that
21376+ * requires changes to all other timekeeper usage sites as
21377+ * well, i.e. move the timekeeper pointer getter into the
21378+ * spinlocked/seqcount protected sections. And we trade this
21379+ * memcpy under the timekeeper_seq against one before we start
21380+ * updating.
21381+ */
21382+ memcpy(real_tk, tk, sizeof(*tk));
21383+ timekeeping_update(real_tk, false, false);
21384+ write_seqcount_end(&timekeeper_seq);
21385 out:
21386- write_sequnlock_irqrestore(&tk->lock, flags);
21387-
21388+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
21389 }
21390
21391 /**
21392@@ -1250,13 +1280,13 @@ void get_monotonic_boottime(struct timespec *ts)
21393 WARN_ON(timekeeping_suspended);
21394
21395 do {
21396- seq = read_seqbegin(&tk->lock);
21397+ seq = read_seqcount_begin(&timekeeper_seq);
21398 ts->tv_sec = tk->xtime_sec;
21399 nsec = timekeeping_get_ns(tk);
21400 tomono = tk->wall_to_monotonic;
21401 sleep = tk->total_sleep_time;
21402
21403- } while (read_seqretry(&tk->lock, seq));
21404+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21405
21406 ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
21407 ts->tv_nsec = 0;
21408@@ -1315,10 +1345,10 @@ struct timespec current_kernel_time(void)
21409 unsigned long seq;
21410
21411 do {
21412- seq = read_seqbegin(&tk->lock);
21413+ seq = read_seqcount_begin(&timekeeper_seq);
21414
21415 now = tk_xtime(tk);
21416- } while (read_seqretry(&tk->lock, seq));
21417+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21418
21419 return now;
21420 }
21421@@ -1331,11 +1361,11 @@ struct timespec get_monotonic_coarse(void)
21422 unsigned long seq;
21423
21424 do {
21425- seq = read_seqbegin(&tk->lock);
21426+ seq = read_seqcount_begin(&timekeeper_seq);
21427
21428 now = tk_xtime(tk);
21429 mono = tk->wall_to_monotonic;
21430- } while (read_seqretry(&tk->lock, seq));
21431+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21432
21433 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
21434 now.tv_nsec + mono.tv_nsec);
21435@@ -1366,11 +1396,11 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
21436 unsigned long seq;
21437
21438 do {
21439- seq = read_seqbegin(&tk->lock);
21440+ seq = read_seqcount_begin(&timekeeper_seq);
21441 *xtim = tk_xtime(tk);
21442 *wtom = tk->wall_to_monotonic;
21443 *sleep = tk->total_sleep_time;
21444- } while (read_seqretry(&tk->lock, seq));
21445+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21446 }
21447
21448 #ifdef CONFIG_HIGH_RES_TIMERS
21449@@ -1390,14 +1420,14 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
21450 u64 secs, nsecs;
21451
21452 do {
21453- seq = read_seqbegin(&tk->lock);
21454+ seq = read_seqcount_begin(&timekeeper_seq);
21455
21456 secs = tk->xtime_sec;
21457 nsecs = timekeeping_get_ns(tk);
21458
21459 *offs_real = tk->offs_real;
21460 *offs_boot = tk->offs_boot;
21461- } while (read_seqretry(&tk->lock, seq));
21462+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21463
21464 now = ktime_add_ns(ktime_set(secs, 0), nsecs);
21465 now = ktime_sub(now, *offs_real);
21466@@ -1415,9 +1445,9 @@ ktime_t ktime_get_monotonic_offset(void)
21467 struct timespec wtom;
21468
21469 do {
21470- seq = read_seqbegin(&tk->lock);
21471+ seq = read_seqcount_begin(&timekeeper_seq);
21472 wtom = tk->wall_to_monotonic;
21473- } while (read_seqretry(&tk->lock, seq));
21474+ } while (read_seqcount_retry(&timekeeper_seq, seq));
21475
21476 return timespec_to_ktime(wtom);
21477 }
21478@@ -1431,7 +1461,9 @@ EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
21479 */
21480 void xtime_update(unsigned long ticks)
21481 {
21482- write_seqlock(&jiffies_lock);
21483+ raw_spin_lock(&jiffies_lock);
21484+ write_seqcount_begin(&jiffies_seq);
21485 do_timer(ticks);
21486- write_sequnlock(&jiffies_lock);
21487+ write_seqcount_end(&jiffies_seq);
21488+ raw_spin_unlock(&jiffies_lock);
21489 }
21490diff --git a/kernel/timer.c b/kernel/timer.c
21491index 367d008..374e7b1 100644
21492--- a/kernel/timer.c
21493+++ b/kernel/timer.c
21494@@ -76,6 +76,7 @@ struct tvec_root {
21495 struct tvec_base {
21496 spinlock_t lock;
21497 struct timer_list *running_timer;
21498+ wait_queue_head_t wait_for_running_timer;
21499 unsigned long timer_jiffies;
21500 unsigned long next_timer;
21501 unsigned long active_timers;
21502@@ -716,6 +717,36 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
21503 }
21504 }
21505
21506+#ifndef CONFIG_PREEMPT_RT_FULL
21507+static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
21508+ struct tvec_base *old,
21509+ struct tvec_base *new)
21510+{
21511+ /* See the comment in lock_timer_base() */
21512+ timer_set_base(timer, NULL);
21513+ spin_unlock(&old->lock);
21514+ spin_lock(&new->lock);
21515+ timer_set_base(timer, new);
21516+ return new;
21517+}
21518+#else
21519+static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
21520+ struct tvec_base *old,
21521+ struct tvec_base *new)
21522+{
21523+ /*
21524+ * We cannot do the above because we might be preempted and
21525+ * then the preempter would see NULL and loop forever.
21526+ */
21527+ if (spin_trylock(&new->lock)) {
21528+ timer_set_base(timer, new);
21529+ spin_unlock(&old->lock);
21530+ return new;
21531+ }
21532+ return old;
21533+}
21534+#endif
21535+
21536 static inline int
21537 __mod_timer(struct timer_list *timer, unsigned long expires,
21538 bool pending_only, int pinned)
21539@@ -735,12 +766,15 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
21540
21541 debug_activate(timer, expires);
21542
21543+ preempt_disable_rt();
21544 cpu = smp_processor_id();
21545
21546 #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
21547 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
21548 cpu = get_nohz_timer_target();
21549 #endif
21550+ preempt_enable_rt();
21551+
21552 new_base = per_cpu(tvec_bases, cpu);
21553
21554 if (base != new_base) {
21555@@ -751,14 +785,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
21556 * handler yet has not finished. This also guarantees that
21557 * the timer is serialized wrt itself.
21558 */
21559- if (likely(base->running_timer != timer)) {
21560- /* See the comment in lock_timer_base() */
21561- timer_set_base(timer, NULL);
21562- spin_unlock(&base->lock);
21563- base = new_base;
21564- spin_lock(&base->lock);
21565- timer_set_base(timer, base);
21566- }
21567+ if (likely(base->running_timer != timer))
21568+ base = switch_timer_base(timer, base, new_base);
21569 }
21570
21571 timer->expires = expires;
21572@@ -941,6 +969,29 @@ void add_timer_on(struct timer_list *timer, int cpu)
21573 }
21574 EXPORT_SYMBOL_GPL(add_timer_on);
21575
21576+#ifdef CONFIG_PREEMPT_RT_FULL
21577+/*
21578+ * Wait for a running timer
21579+ */
21580+static void wait_for_running_timer(struct timer_list *timer)
21581+{
21582+ struct tvec_base *base = timer->base;
21583+
21584+ if (base->running_timer == timer)
21585+ wait_event(base->wait_for_running_timer,
21586+ base->running_timer != timer);
21587+}
21588+
21589+# define wakeup_timer_waiters(b) wake_up(&(b)->wait_for_tunning_timer)
21590+#else
21591+static inline void wait_for_running_timer(struct timer_list *timer)
21592+{
21593+ cpu_relax();
21594+}
21595+
21596+# define wakeup_timer_waiters(b) do { } while (0)
21597+#endif
21598+
21599 /**
21600 * del_timer - deactive a timer.
21601 * @timer: the timer to be deactivated
21602@@ -998,7 +1049,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
21603 }
21604 EXPORT_SYMBOL(try_to_del_timer_sync);
21605
21606-#ifdef CONFIG_SMP
21607+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
21608 /**
21609 * del_timer_sync - deactivate a timer and wait for the handler to finish.
21610 * @timer: the timer to be deactivated
21611@@ -1058,7 +1109,7 @@ int del_timer_sync(struct timer_list *timer)
21612 int ret = try_to_del_timer_sync(timer);
21613 if (ret >= 0)
21614 return ret;
21615- cpu_relax();
21616+ wait_for_running_timer(timer);
21617 }
21618 }
21619 EXPORT_SYMBOL(del_timer_sync);
21620@@ -1175,15 +1226,17 @@ static inline void __run_timers(struct tvec_base *base)
21621 if (irqsafe) {
21622 spin_unlock(&base->lock);
21623 call_timer_fn(timer, fn, data);
21624+ base->running_timer = NULL;
21625 spin_lock(&base->lock);
21626 } else {
21627 spin_unlock_irq(&base->lock);
21628 call_timer_fn(timer, fn, data);
21629+ base->running_timer = NULL;
21630 spin_lock_irq(&base->lock);
21631 }
21632 }
21633 }
21634- base->running_timer = NULL;
21635+ wake_up(&base->wait_for_running_timer);
21636 spin_unlock_irq(&base->lock);
21637 }
21638
21639@@ -1323,17 +1376,31 @@ unsigned long get_next_timer_interrupt(unsigned long now)
21640 if (cpu_is_offline(smp_processor_id()))
21641 return expires;
21642
21643+#ifdef CONFIG_PREEMPT_RT_FULL
21644+ /*
21645+ * On PREEMPT_RT we cannot sleep here. If the trylock does not
21646+ * succeed then we return the worst-case 'expires in 1 tick'
21647+ * value. We use the rt functions here directly to avoid a
21648+ * migrate_disable() call.
21649+ */
21650+ if (!spin_do_trylock(&base->lock))
21651+ return now + 1;
21652+#else
21653 spin_lock(&base->lock);
21654+#endif
21655 if (base->active_timers) {
21656 if (time_before_eq(base->next_timer, base->timer_jiffies))
21657 base->next_timer = __next_timer_interrupt(base);
21658 expires = base->next_timer;
21659 }
21660+#ifdef CONFIG_PREEMPT_RT_FULL
21661+ rt_spin_unlock(&base->lock);
21662+#else
21663 spin_unlock(&base->lock);
21664+#endif
21665
21666 if (time_before_eq(expires, now))
21667 return now;
21668-
21669 return cmp_next_hrtimer_event(now, expires);
21670 }
21671 #endif
21672@@ -1349,14 +1416,13 @@ void update_process_times(int user_tick)
21673
21674 /* Note: this timer irq context must be accounted for as well. */
21675 account_process_tick(p, user_tick);
21676+ scheduler_tick();
21677 run_local_timers();
21678 rcu_check_callbacks(cpu, user_tick);
21679- printk_tick();
21680-#ifdef CONFIG_IRQ_WORK
21681+#if defined(CONFIG_IRQ_WORK) && !defined(CONFIG_PREEMPT_RT_FULL)
21682 if (in_irq())
21683 irq_work_run();
21684 #endif
21685- scheduler_tick();
21686 run_posix_cpu_timers(p);
21687 }
21688
21689@@ -1367,6 +1433,11 @@ static void run_timer_softirq(struct softirq_action *h)
21690 {
21691 struct tvec_base *base = __this_cpu_read(tvec_bases);
21692
21693+#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
21694+ irq_work_run();
21695+#endif
21696+
21697+ printk_tick();
21698 hrtimer_run_pending();
21699
21700 if (time_after_eq(jiffies, base->timer_jiffies))
21701@@ -1684,6 +1755,7 @@ static int __cpuinit init_timers_cpu(int cpu)
21702 }
21703
21704 spin_lock_init(&base->lock);
21705+ init_waitqueue_head(&base->wait_for_running_timer);
21706
21707 for (j = 0; j < TVN_SIZE; j++) {
21708 INIT_LIST_HEAD(base->tv5.vec + j);
21709@@ -1722,7 +1794,7 @@ static void __cpuinit migrate_timers(int cpu)
21710
21711 BUG_ON(cpu_online(cpu));
21712 old_base = per_cpu(tvec_bases, cpu);
21713- new_base = get_cpu_var(tvec_bases);
21714+ new_base = get_local_var(tvec_bases);
21715 /*
21716 * The caller is globally serialized and nobody else
21717 * takes two locks at once, deadlock is not possible.
21718@@ -1743,7 +1815,7 @@ static void __cpuinit migrate_timers(int cpu)
21719
21720 spin_unlock(&old_base->lock);
21721 spin_unlock_irq(&new_base->lock);
21722- put_cpu_var(tvec_bases);
21723+ put_local_var(tvec_bases);
21724 }
21725 #endif /* CONFIG_HOTPLUG_CPU */
21726
21727diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
21728index 2747967..c872f5f 100644
21729--- a/kernel/trace/Kconfig
21730+++ b/kernel/trace/Kconfig
21731@@ -202,6 +202,24 @@ config IRQSOFF_TRACER
21732 enabled. This option and the preempt-off timing option can be
21733 used together or separately.)
21734
21735+config INTERRUPT_OFF_HIST
21736+ bool "Interrupts-off Latency Histogram"
21737+ depends on IRQSOFF_TRACER
21738+ help
21739+ This option generates continuously updated histograms (one per cpu)
21740+ of the duration of time periods with interrupts disabled. The
21741+ histograms are disabled by default. To enable them, write a non-zero
21742+ number to
21743+
21744+ /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
21745+
21746+ If PREEMPT_OFF_HIST is also selected, additional histograms (one
21747+ per cpu) are generated that accumulate the duration of time periods
21748+ when both interrupts and preemption are disabled. The histogram data
21749+ will be located in the debug file system at
21750+
21751+ /sys/kernel/debug/tracing/latency_hist/irqsoff
21752+
21753 config PREEMPT_TRACER
21754 bool "Preemption-off Latency Tracer"
21755 default n
21756@@ -224,6 +242,24 @@ config PREEMPT_TRACER
21757 enabled. This option and the irqs-off timing option can be
21758 used together or separately.)
21759
21760+config PREEMPT_OFF_HIST
21761+ bool "Preemption-off Latency Histogram"
21762+ depends on PREEMPT_TRACER
21763+ help
21764+ This option generates continuously updated histograms (one per cpu)
21765+ of the duration of time periods with preemption disabled. The
21766+ histograms are disabled by default. To enable them, write a non-zero
21767+ number to
21768+
21769+ /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
21770+
21771+ If INTERRUPT_OFF_HIST is also selected, additional histograms (one
21772+ per cpu) are generated that accumulate the duration of time periods
21773+ when both interrupts and preemption are disabled. The histogram data
21774+ will be located in the debug file system at
21775+
21776+ /sys/kernel/debug/tracing/latency_hist/preemptoff
21777+
21778 config SCHED_TRACER
21779 bool "Scheduling Latency Tracer"
21780 select GENERIC_TRACER
21781@@ -233,6 +269,74 @@ config SCHED_TRACER
21782 This tracer tracks the latency of the highest priority task
21783 to be scheduled in, starting from the point it has woken up.
21784
21785+config WAKEUP_LATENCY_HIST
21786+ bool "Scheduling Latency Histogram"
21787+ depends on SCHED_TRACER
21788+ help
21789+ This option generates continuously updated histograms (one per cpu)
21790+ of the scheduling latency of the highest priority task.
21791+ The histograms are disabled by default. To enable them, write a
21792+ non-zero number to
21793+
21794+ /sys/kernel/debug/tracing/latency_hist/enable/wakeup
21795+
21796+ Two different algorithms are used, one to determine the latency of
21797+ processes that exclusively use the highest priority of the system and
21798+ another one to determine the latency of processes that share the
21799+ highest system priority with other processes. The former is used to
21800+ improve hardware and system software, the latter to optimize the
21801+ priority design of a given system. The histogram data will be
21802+ located in the debug file system at
21803+
21804+ /sys/kernel/debug/tracing/latency_hist/wakeup
21805+
21806+ and
21807+
21808+ /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
21809+
21810+ If both Scheduling Latency Histogram and Missed Timer Offsets
21811+ Histogram are selected, additional histogram data will be collected
21812+ that contain, in addition to the wakeup latency, the timer latency, in
21813+ case the wakeup was triggered by an expired timer. These histograms
21814+ are available in the
21815+
21816+ /sys/kernel/debug/tracing/latency_hist/timerandwakeup
21817+
21818+ directory. They reflect the apparent interrupt and scheduling latency
21819+ and are best suitable to determine the worst-case latency of a given
21820+ system. To enable these histograms, write a non-zero number to
21821+
21822+ /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
21823+
21824+config MISSED_TIMER_OFFSETS_HIST
21825+ depends on HIGH_RES_TIMERS
21826+ select GENERIC_TRACER
21827+ bool "Missed Timer Offsets Histogram"
21828+ help
21829+ Generate a histogram of missed timer offsets in microseconds. The
21830+ histograms are disabled by default. To enable them, write a non-zero
21831+ number to
21832+
21833+ /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
21834+
21835+ The histogram data will be located in the debug file system at
21836+
21837+ /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
21838+
21839+ If both Scheduling Latency Histogram and Missed Timer Offsets
21840+ Histogram are selected, additional histogram data will be collected
21841+ that contain, in addition to the wakeup latency, the timer latency, in
21842+ case the wakeup was triggered by an expired timer. These histograms
21843+ are available in the
21844+
21845+ /sys/kernel/debug/tracing/latency_hist/timerandwakeup
21846+
21847+ directory. They reflect the apparent interrupt and scheduling latency
21848+ and are best suitable to determine the worst-case latency of a given
21849+ system. To enable these histograms, write a non-zero number to
21850+
21851+ /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
21852+
21853 config ENABLE_DEFAULT_TRACERS
21854 bool "Trace process context switches and events"
21855 depends on !GENERIC_TRACER
21856diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
21857index d7e2068..f5e0243 100644
21858--- a/kernel/trace/Makefile
21859+++ b/kernel/trace/Makefile
21860@@ -34,6 +34,10 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
21861 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
21862 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
21863 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
21864+obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
21865+obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
21866+obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
21867+obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
21868 obj-$(CONFIG_NOP_TRACER) += trace_nop.o
21869 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
21870 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
21871diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c
21872new file mode 100644
21873index 0000000..6a4c869
21874--- /dev/null
21875+++ b/kernel/trace/latency_hist.c
21876@@ -0,0 +1,1176 @@
21877+/*
21878+ * kernel/trace/latency_hist.c
21879+ *
21880+ * Add support for histograms of preemption-off latency and
21881+ * interrupt-off latency and wakeup latency, it depends on
21882+ * Real-Time Preemption Support.
21883+ *
21884+ * Copyright (C) 2005 MontaVista Software, Inc.
21885+ * Yi Yang <yyang@ch.mvista.com>
21886+ *
21887+ * Converted to work with the new latency tracer.
21888+ * Copyright (C) 2008 Red Hat, Inc.
21889+ * Steven Rostedt <srostedt@redhat.com>
21890+ *
21891+ */
21892+#include <linux/module.h>
21893+#include <linux/debugfs.h>
21894+#include <linux/seq_file.h>
21895+#include <linux/percpu.h>
21896+#include <linux/kallsyms.h>
21897+#include <linux/uaccess.h>
21898+#include <linux/sched.h>
21899+#include <linux/slab.h>
21900+#include <asm/atomic.h>
21901+#include <asm/div64.h>
21902+
21903+#include "trace.h"
21904+#include <trace/events/sched.h>
21905+
21906+#define NSECS_PER_USECS 1000L
21907+
21908+#define CREATE_TRACE_POINTS
21909+#include <trace/events/hist.h>
21910+
21911+enum {
21912+ IRQSOFF_LATENCY = 0,
21913+ PREEMPTOFF_LATENCY,
21914+ PREEMPTIRQSOFF_LATENCY,
21915+ WAKEUP_LATENCY,
21916+ WAKEUP_LATENCY_SHAREDPRIO,
21917+ MISSED_TIMER_OFFSETS,
21918+ TIMERANDWAKEUP_LATENCY,
21919+ MAX_LATENCY_TYPE,
21920+};
21921+
21922+#define MAX_ENTRY_NUM 10240
21923+
21924+struct hist_data {
21925+ atomic_t hist_mode; /* 0 log, 1 don't log */
21926+ long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
21927+ long min_lat;
21928+ long max_lat;
21929+ unsigned long long below_hist_bound_samples;
21930+ unsigned long long above_hist_bound_samples;
21931+ long long accumulate_lat;
21932+ unsigned long long total_samples;
21933+ unsigned long long hist_array[MAX_ENTRY_NUM];
21934+};
21935+
21936+struct enable_data {
21937+ int latency_type;
21938+ int enabled;
21939+};
21940+
21941+static char *latency_hist_dir_root = "latency_hist";
21942+
21943+#ifdef CONFIG_INTERRUPT_OFF_HIST
21944+static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
21945+static char *irqsoff_hist_dir = "irqsoff";
21946+static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
21947+static DEFINE_PER_CPU(int, hist_irqsoff_counting);
21948+#endif
21949+
21950+#ifdef CONFIG_PREEMPT_OFF_HIST
21951+static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
21952+static char *preemptoff_hist_dir = "preemptoff";
21953+static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
21954+static DEFINE_PER_CPU(int, hist_preemptoff_counting);
21955+#endif
21956+
21957+#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
21958+static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
21959+static char *preemptirqsoff_hist_dir = "preemptirqsoff";
21960+static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
21961+static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
21962+#endif
21963+
21964+#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
21965+static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
21966+static struct enable_data preemptirqsoff_enabled_data = {
21967+ .latency_type = PREEMPTIRQSOFF_LATENCY,
21968+ .enabled = 0,
21969+};
21970+#endif
21971+
21972+#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
21973+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
21974+struct maxlatproc_data {
21975+ char comm[FIELD_SIZEOF(struct task_struct, comm)];
21976+ char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
21977+ int pid;
21978+ int current_pid;
21979+ int prio;
21980+ int current_prio;
21981+ long latency;
21982+ long timeroffset;
21983+ cycle_t timestamp;
21984+};
21985+#endif
21986+
21987+#ifdef CONFIG_WAKEUP_LATENCY_HIST
21988+static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
21989+static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
21990+static char *wakeup_latency_hist_dir = "wakeup";
21991+static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
21992+static notrace void probe_wakeup_latency_hist_start(void *v,
21993+ struct task_struct *p, int success);
21994+static notrace void probe_wakeup_latency_hist_stop(void *v,
21995+ struct task_struct *prev, struct task_struct *next);
21996+static notrace void probe_sched_migrate_task(void *,
21997+ struct task_struct *task, int cpu);
21998+static struct enable_data wakeup_latency_enabled_data = {
21999+ .latency_type = WAKEUP_LATENCY,
22000+ .enabled = 0,
22001+};
22002+static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
22003+static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
22004+static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
22005+static DEFINE_PER_CPU(int, wakeup_sharedprio);
22006+static unsigned long wakeup_pid;
22007+#endif
22008+
22009+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
22010+static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
22011+static char *missed_timer_offsets_dir = "missed_timer_offsets";
22012+static notrace void probe_hrtimer_interrupt(void *v, int cpu,
22013+ long long offset, struct task_struct *curr, struct task_struct *task);
22014+static struct enable_data missed_timer_offsets_enabled_data = {
22015+ .latency_type = MISSED_TIMER_OFFSETS,
22016+ .enabled = 0,
22017+};
22018+static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
22019+static unsigned long missed_timer_offsets_pid;
22020+#endif
22021+
22022+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
22023+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22024+static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
22025+static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
22026+static struct enable_data timerandwakeup_enabled_data = {
22027+ .latency_type = TIMERANDWAKEUP_LATENCY,
22028+ .enabled = 0,
22029+};
22030+static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
22031+#endif
22032+
22033+void notrace latency_hist(int latency_type, int cpu, long latency,
22034+ long timeroffset, cycle_t stop,
22035+ struct task_struct *p)
22036+{
22037+ struct hist_data *my_hist;
22038+#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
22039+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22040+ struct maxlatproc_data *mp = NULL;
22041+#endif
22042+
22043+ if (cpu < 0 || cpu >= NR_CPUS || latency_type < 0 ||
22044+ latency_type >= MAX_LATENCY_TYPE)
22045+ return;
22046+
22047+ switch (latency_type) {
22048+#ifdef CONFIG_INTERRUPT_OFF_HIST
22049+ case IRQSOFF_LATENCY:
22050+ my_hist = &per_cpu(irqsoff_hist, cpu);
22051+ break;
22052+#endif
22053+#ifdef CONFIG_PREEMPT_OFF_HIST
22054+ case PREEMPTOFF_LATENCY:
22055+ my_hist = &per_cpu(preemptoff_hist, cpu);
22056+ break;
22057+#endif
22058+#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
22059+ case PREEMPTIRQSOFF_LATENCY:
22060+ my_hist = &per_cpu(preemptirqsoff_hist, cpu);
22061+ break;
22062+#endif
22063+#ifdef CONFIG_WAKEUP_LATENCY_HIST
22064+ case WAKEUP_LATENCY:
22065+ my_hist = &per_cpu(wakeup_latency_hist, cpu);
22066+ mp = &per_cpu(wakeup_maxlatproc, cpu);
22067+ break;
22068+ case WAKEUP_LATENCY_SHAREDPRIO:
22069+ my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
22070+ mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
22071+ break;
22072+#endif
22073+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
22074+ case MISSED_TIMER_OFFSETS:
22075+ my_hist = &per_cpu(missed_timer_offsets, cpu);
22076+ mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
22077+ break;
22078+#endif
22079+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
22080+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22081+ case TIMERANDWAKEUP_LATENCY:
22082+ my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
22083+ mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
22084+ break;
22085+#endif
22086+
22087+ default:
22088+ return;
22089+ }
22090+
22091+ latency += my_hist->offset;
22092+
22093+ if (atomic_read(&my_hist->hist_mode) == 0)
22094+ return;
22095+
22096+ if (latency < 0 || latency >= MAX_ENTRY_NUM) {
22097+ if (latency < 0)
22098+ my_hist->below_hist_bound_samples++;
22099+ else
22100+ my_hist->above_hist_bound_samples++;
22101+ } else
22102+ my_hist->hist_array[latency]++;
22103+
22104+ if (unlikely(latency > my_hist->max_lat ||
22105+ my_hist->min_lat == LONG_MAX)) {
22106+#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
22107+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22108+ if (latency_type == WAKEUP_LATENCY ||
22109+ latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
22110+ latency_type == MISSED_TIMER_OFFSETS ||
22111+ latency_type == TIMERANDWAKEUP_LATENCY) {
22112+ strncpy(mp->comm, p->comm, sizeof(mp->comm));
22113+ strncpy(mp->current_comm, current->comm,
22114+ sizeof(mp->current_comm));
22115+ mp->pid = task_pid_nr(p);
22116+ mp->current_pid = task_pid_nr(current);
22117+ mp->prio = p->prio;
22118+ mp->current_prio = current->prio;
22119+ mp->latency = latency;
22120+ mp->timeroffset = timeroffset;
22121+ mp->timestamp = stop;
22122+ }
22123+#endif
22124+ my_hist->max_lat = latency;
22125+ }
22126+ if (unlikely(latency < my_hist->min_lat))
22127+ my_hist->min_lat = latency;
22128+ my_hist->total_samples++;
22129+ my_hist->accumulate_lat += latency;
22130+}
22131+
22132+static void *l_start(struct seq_file *m, loff_t *pos)
22133+{
22134+ loff_t *index_ptr = NULL;
22135+ loff_t index = *pos;
22136+ struct hist_data *my_hist = m->private;
22137+
22138+ if (index == 0) {
22139+ char minstr[32], avgstr[32], maxstr[32];
22140+
22141+ atomic_dec(&my_hist->hist_mode);
22142+
22143+ if (likely(my_hist->total_samples)) {
22144+ long avg = (long) div64_s64(my_hist->accumulate_lat,
22145+ my_hist->total_samples);
22146+ snprintf(minstr, sizeof(minstr), "%ld",
22147+ my_hist->min_lat - my_hist->offset);
22148+ snprintf(avgstr, sizeof(avgstr), "%ld",
22149+ avg - my_hist->offset);
22150+ snprintf(maxstr, sizeof(maxstr), "%ld",
22151+ my_hist->max_lat - my_hist->offset);
22152+ } else {
22153+ strcpy(minstr, "<undef>");
22154+ strcpy(avgstr, minstr);
22155+ strcpy(maxstr, minstr);
22156+ }
22157+
22158+ seq_printf(m, "#Minimum latency: %s microseconds\n"
22159+ "#Average latency: %s microseconds\n"
22160+ "#Maximum latency: %s microseconds\n"
22161+ "#Total samples: %llu\n"
22162+ "#There are %llu samples lower than %ld"
22163+ " microseconds.\n"
22164+ "#There are %llu samples greater or equal"
22165+ " than %ld microseconds.\n"
22166+ "#usecs\t%16s\n",
22167+ minstr, avgstr, maxstr,
22168+ my_hist->total_samples,
22169+ my_hist->below_hist_bound_samples,
22170+ -my_hist->offset,
22171+ my_hist->above_hist_bound_samples,
22172+ MAX_ENTRY_NUM - my_hist->offset,
22173+ "samples");
22174+ }
22175+ if (index < MAX_ENTRY_NUM) {
22176+ index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
22177+ if (index_ptr)
22178+ *index_ptr = index;
22179+ }
22180+
22181+ return index_ptr;
22182+}
22183+
22184+static void *l_next(struct seq_file *m, void *p, loff_t *pos)
22185+{
22186+ loff_t *index_ptr = p;
22187+ struct hist_data *my_hist = m->private;
22188+
22189+ if (++*pos >= MAX_ENTRY_NUM) {
22190+ atomic_inc(&my_hist->hist_mode);
22191+ return NULL;
22192+ }
22193+ *index_ptr = *pos;
22194+ return index_ptr;
22195+}
22196+
22197+static void l_stop(struct seq_file *m, void *p)
22198+{
22199+ kfree(p);
22200+}
22201+
22202+static int l_show(struct seq_file *m, void *p)
22203+{
22204+ int index = *(loff_t *) p;
22205+ struct hist_data *my_hist = m->private;
22206+
22207+ seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
22208+ my_hist->hist_array[index]);
22209+ return 0;
22210+}
22211+
22212+static struct seq_operations latency_hist_seq_op = {
22213+ .start = l_start,
22214+ .next = l_next,
22215+ .stop = l_stop,
22216+ .show = l_show
22217+};
22218+
22219+static int latency_hist_open(struct inode *inode, struct file *file)
22220+{
22221+ int ret;
22222+
22223+ ret = seq_open(file, &latency_hist_seq_op);
22224+ if (!ret) {
22225+ struct seq_file *seq = file->private_data;
22226+ seq->private = inode->i_private;
22227+ }
22228+ return ret;
22229+}
22230+
22231+static struct file_operations latency_hist_fops = {
22232+ .open = latency_hist_open,
22233+ .read = seq_read,
22234+ .llseek = seq_lseek,
22235+ .release = seq_release,
22236+};
22237+
22238+#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
22239+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22240+static void clear_maxlatprocdata(struct maxlatproc_data *mp)
22241+{
22242+ mp->comm[0] = mp->current_comm[0] = '\0';
22243+ mp->prio = mp->current_prio = mp->pid = mp->current_pid =
22244+ mp->latency = mp->timeroffset = -1;
22245+ mp->timestamp = 0;
22246+}
22247+#endif
22248+
22249+static void hist_reset(struct hist_data *hist)
22250+{
22251+ atomic_dec(&hist->hist_mode);
22252+
22253+ memset(hist->hist_array, 0, sizeof(hist->hist_array));
22254+ hist->below_hist_bound_samples = 0ULL;
22255+ hist->above_hist_bound_samples = 0ULL;
22256+ hist->min_lat = LONG_MAX;
22257+ hist->max_lat = LONG_MIN;
22258+ hist->total_samples = 0ULL;
22259+ hist->accumulate_lat = 0LL;
22260+
22261+ atomic_inc(&hist->hist_mode);
22262+}
22263+
22264+static ssize_t
22265+latency_hist_reset(struct file *file, const char __user *a,
22266+ size_t size, loff_t *off)
22267+{
22268+ int cpu;
22269+ struct hist_data *hist = NULL;
22270+#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
22271+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22272+ struct maxlatproc_data *mp = NULL;
22273+#endif
22274+ off_t latency_type = (off_t) file->private_data;
22275+
22276+ for_each_online_cpu(cpu) {
22277+
22278+ switch (latency_type) {
22279+#ifdef CONFIG_PREEMPT_OFF_HIST
22280+ case PREEMPTOFF_LATENCY:
22281+ hist = &per_cpu(preemptoff_hist, cpu);
22282+ break;
22283+#endif
22284+#ifdef CONFIG_INTERRUPT_OFF_HIST
22285+ case IRQSOFF_LATENCY:
22286+ hist = &per_cpu(irqsoff_hist, cpu);
22287+ break;
22288+#endif
22289+#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
22290+ case PREEMPTIRQSOFF_LATENCY:
22291+ hist = &per_cpu(preemptirqsoff_hist, cpu);
22292+ break;
22293+#endif
22294+#ifdef CONFIG_WAKEUP_LATENCY_HIST
22295+ case WAKEUP_LATENCY:
22296+ hist = &per_cpu(wakeup_latency_hist, cpu);
22297+ mp = &per_cpu(wakeup_maxlatproc, cpu);
22298+ break;
22299+ case WAKEUP_LATENCY_SHAREDPRIO:
22300+ hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
22301+ mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
22302+ break;
22303+#endif
22304+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
22305+ case MISSED_TIMER_OFFSETS:
22306+ hist = &per_cpu(missed_timer_offsets, cpu);
22307+ mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
22308+ break;
22309+#endif
22310+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
22311+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22312+ case TIMERANDWAKEUP_LATENCY:
22313+ hist = &per_cpu(timerandwakeup_latency_hist, cpu);
22314+ mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
22315+ break;
22316+#endif
22317+ }
22318+
22319+ hist_reset(hist);
22320+#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
22321+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22322+ if (latency_type == WAKEUP_LATENCY ||
22323+ latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
22324+ latency_type == MISSED_TIMER_OFFSETS ||
22325+ latency_type == TIMERANDWAKEUP_LATENCY)
22326+ clear_maxlatprocdata(mp);
22327+#endif
22328+ }
22329+
22330+ return size;
22331+}
22332+
22333+#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
22334+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22335+static ssize_t
22336+show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
22337+{
22338+ char buf[64];
22339+ int r;
22340+ unsigned long *this_pid = file->private_data;
22341+
22342+ r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
22343+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
22344+}
22345+
22346+static ssize_t do_pid(struct file *file, const char __user *ubuf,
22347+ size_t cnt, loff_t *ppos)
22348+{
22349+ char buf[64];
22350+ unsigned long pid;
22351+ unsigned long *this_pid = file->private_data;
22352+
22353+ if (cnt >= sizeof(buf))
22354+ return -EINVAL;
22355+
22356+ if (copy_from_user(&buf, ubuf, cnt))
22357+ return -EFAULT;
22358+
22359+ buf[cnt] = '\0';
22360+
22361+ if (strict_strtoul(buf, 10, &pid))
22362+ return(-EINVAL);
22363+
22364+ *this_pid = pid;
22365+
22366+ return cnt;
22367+}
22368+#endif
22369+
22370+#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
22371+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22372+static ssize_t
22373+show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
22374+{
22375+ int r;
22376+ struct maxlatproc_data *mp = file->private_data;
22377+ int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
22378+ unsigned long long t;
22379+ unsigned long usecs, secs;
22380+ char *buf;
22381+
22382+ if (mp->pid == -1 || mp->current_pid == -1) {
22383+ buf = "(none)\n";
22384+ return simple_read_from_buffer(ubuf, cnt, ppos, buf,
22385+ strlen(buf));
22386+ }
22387+
22388+ buf = kmalloc(strmaxlen, GFP_KERNEL);
22389+ if (buf == NULL)
22390+ return -ENOMEM;
22391+
22392+ t = ns2usecs(mp->timestamp);
22393+ usecs = do_div(t, USEC_PER_SEC);
22394+ secs = (unsigned long) t;
22395+ r = snprintf(buf, strmaxlen,
22396+ "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
22397+ MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
22398+ mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
22399+ secs, usecs);
22400+ r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
22401+ kfree(buf);
22402+ return r;
22403+}
22404+#endif
22405+
22406+static ssize_t
22407+show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
22408+{
22409+ char buf[64];
22410+ struct enable_data *ed = file->private_data;
22411+ int r;
22412+
22413+ r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
22414+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
22415+}
22416+
22417+static ssize_t
22418+do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
22419+{
22420+ char buf[64];
22421+ long enable;
22422+ struct enable_data *ed = file->private_data;
22423+
22424+ if (cnt >= sizeof(buf))
22425+ return -EINVAL;
22426+
22427+ if (copy_from_user(&buf, ubuf, cnt))
22428+ return -EFAULT;
22429+
22430+ buf[cnt] = 0;
22431+
22432+ if (strict_strtol(buf, 10, &enable))
22433+ return(-EINVAL);
22434+
22435+ if ((enable && ed->enabled) || (!enable && !ed->enabled))
22436+ return cnt;
22437+
22438+ if (enable) {
22439+ int ret;
22440+
22441+ switch (ed->latency_type) {
22442+#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
22443+ case PREEMPTIRQSOFF_LATENCY:
22444+ ret = register_trace_preemptirqsoff_hist(
22445+ probe_preemptirqsoff_hist, NULL);
22446+ if (ret) {
22447+ pr_info("wakeup trace: Couldn't assign "
22448+ "probe_preemptirqsoff_hist "
22449+ "to trace_preemptirqsoff_hist\n");
22450+ return ret;
22451+ }
22452+ break;
22453+#endif
22454+#ifdef CONFIG_WAKEUP_LATENCY_HIST
22455+ case WAKEUP_LATENCY:
22456+ ret = register_trace_sched_wakeup(
22457+ probe_wakeup_latency_hist_start, NULL);
22458+ if (ret) {
22459+ pr_info("wakeup trace: Couldn't assign "
22460+ "probe_wakeup_latency_hist_start "
22461+ "to trace_sched_wakeup\n");
22462+ return ret;
22463+ }
22464+ ret = register_trace_sched_wakeup_new(
22465+ probe_wakeup_latency_hist_start, NULL);
22466+ if (ret) {
22467+ pr_info("wakeup trace: Couldn't assign "
22468+ "probe_wakeup_latency_hist_start "
22469+ "to trace_sched_wakeup_new\n");
22470+ unregister_trace_sched_wakeup(
22471+ probe_wakeup_latency_hist_start, NULL);
22472+ return ret;
22473+ }
22474+ ret = register_trace_sched_switch(
22475+ probe_wakeup_latency_hist_stop, NULL);
22476+ if (ret) {
22477+ pr_info("wakeup trace: Couldn't assign "
22478+ "probe_wakeup_latency_hist_stop "
22479+ "to trace_sched_switch\n");
22480+ unregister_trace_sched_wakeup(
22481+ probe_wakeup_latency_hist_start, NULL);
22482+ unregister_trace_sched_wakeup_new(
22483+ probe_wakeup_latency_hist_start, NULL);
22484+ return ret;
22485+ }
22486+ ret = register_trace_sched_migrate_task(
22487+ probe_sched_migrate_task, NULL);
22488+ if (ret) {
22489+ pr_info("wakeup trace: Couldn't assign "
22490+ "probe_sched_migrate_task "
22491+ "to trace_sched_migrate_task\n");
22492+ unregister_trace_sched_wakeup(
22493+ probe_wakeup_latency_hist_start, NULL);
22494+ unregister_trace_sched_wakeup_new(
22495+ probe_wakeup_latency_hist_start, NULL);
22496+ unregister_trace_sched_switch(
22497+ probe_wakeup_latency_hist_stop, NULL);
22498+ return ret;
22499+ }
22500+ break;
22501+#endif
22502+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
22503+ case MISSED_TIMER_OFFSETS:
22504+ ret = register_trace_hrtimer_interrupt(
22505+ probe_hrtimer_interrupt, NULL);
22506+ if (ret) {
22507+ pr_info("wakeup trace: Couldn't assign "
22508+ "probe_hrtimer_interrupt "
22509+ "to trace_hrtimer_interrupt\n");
22510+ return ret;
22511+ }
22512+ break;
22513+#endif
22514+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
22515+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22516+ case TIMERANDWAKEUP_LATENCY:
22517+ if (!wakeup_latency_enabled_data.enabled ||
22518+ !missed_timer_offsets_enabled_data.enabled)
22519+ return -EINVAL;
22520+ break;
22521+#endif
22522+ default:
22523+ break;
22524+ }
22525+ } else {
22526+ switch (ed->latency_type) {
22527+#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
22528+ case PREEMPTIRQSOFF_LATENCY:
22529+ {
22530+ int cpu;
22531+
22532+ unregister_trace_preemptirqsoff_hist(
22533+ probe_preemptirqsoff_hist, NULL);
22534+ for_each_online_cpu(cpu) {
22535+#ifdef CONFIG_INTERRUPT_OFF_HIST
22536+ per_cpu(hist_irqsoff_counting,
22537+ cpu) = 0;
22538+#endif
22539+#ifdef CONFIG_PREEMPT_OFF_HIST
22540+ per_cpu(hist_preemptoff_counting,
22541+ cpu) = 0;
22542+#endif
22543+#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
22544+ per_cpu(hist_preemptirqsoff_counting,
22545+ cpu) = 0;
22546+#endif
22547+ }
22548+ }
22549+ break;
22550+#endif
22551+#ifdef CONFIG_WAKEUP_LATENCY_HIST
22552+ case WAKEUP_LATENCY:
22553+ {
22554+ int cpu;
22555+
22556+ unregister_trace_sched_wakeup(
22557+ probe_wakeup_latency_hist_start, NULL);
22558+ unregister_trace_sched_wakeup_new(
22559+ probe_wakeup_latency_hist_start, NULL);
22560+ unregister_trace_sched_switch(
22561+ probe_wakeup_latency_hist_stop, NULL);
22562+ unregister_trace_sched_migrate_task(
22563+ probe_sched_migrate_task, NULL);
22564+
22565+ for_each_online_cpu(cpu) {
22566+ per_cpu(wakeup_task, cpu) = NULL;
22567+ per_cpu(wakeup_sharedprio, cpu) = 0;
22568+ }
22569+ }
22570+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
22571+ timerandwakeup_enabled_data.enabled = 0;
22572+#endif
22573+ break;
22574+#endif
22575+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
22576+ case MISSED_TIMER_OFFSETS:
22577+ unregister_trace_hrtimer_interrupt(
22578+ probe_hrtimer_interrupt, NULL);
22579+#ifdef CONFIG_WAKEUP_LATENCY_HIST
22580+ timerandwakeup_enabled_data.enabled = 0;
22581+#endif
22582+ break;
22583+#endif
22584+ default:
22585+ break;
22586+ }
22587+ }
22588+ ed->enabled = enable;
22589+ return cnt;
22590+}
22591+
22592+static const struct file_operations latency_hist_reset_fops = {
22593+ .open = tracing_open_generic,
22594+ .write = latency_hist_reset,
22595+};
22596+
22597+static const struct file_operations enable_fops = {
22598+ .open = tracing_open_generic,
22599+ .read = show_enable,
22600+ .write = do_enable,
22601+};
22602+
22603+#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
22604+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22605+static const struct file_operations pid_fops = {
22606+ .open = tracing_open_generic,
22607+ .read = show_pid,
22608+ .write = do_pid,
22609+};
22610+
22611+static const struct file_operations maxlatproc_fops = {
22612+ .open = tracing_open_generic,
22613+ .read = show_maxlatproc,
22614+};
22615+#endif
22616+
22617+#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
22618+static notrace void probe_preemptirqsoff_hist(void *v, int reason,
22619+ int starthist)
22620+{
22621+ int cpu = raw_smp_processor_id();
22622+ int time_set = 0;
22623+
22624+ if (starthist) {
22625+ cycle_t uninitialized_var(start);
22626+
22627+ if (!preempt_count() && !irqs_disabled())
22628+ return;
22629+
22630+#ifdef CONFIG_INTERRUPT_OFF_HIST
22631+ if ((reason == IRQS_OFF || reason == TRACE_START) &&
22632+ !per_cpu(hist_irqsoff_counting, cpu)) {
22633+ per_cpu(hist_irqsoff_counting, cpu) = 1;
22634+ start = ftrace_now(cpu);
22635+ time_set++;
22636+ per_cpu(hist_irqsoff_start, cpu) = start;
22637+ }
22638+#endif
22639+
22640+#ifdef CONFIG_PREEMPT_OFF_HIST
22641+ if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
22642+ !per_cpu(hist_preemptoff_counting, cpu)) {
22643+ per_cpu(hist_preemptoff_counting, cpu) = 1;
22644+ if (!(time_set++))
22645+ start = ftrace_now(cpu);
22646+ per_cpu(hist_preemptoff_start, cpu) = start;
22647+ }
22648+#endif
22649+
22650+#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
22651+ if (per_cpu(hist_irqsoff_counting, cpu) &&
22652+ per_cpu(hist_preemptoff_counting, cpu) &&
22653+ !per_cpu(hist_preemptirqsoff_counting, cpu)) {
22654+ per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
22655+ if (!time_set)
22656+ start = ftrace_now(cpu);
22657+ per_cpu(hist_preemptirqsoff_start, cpu) = start;
22658+ }
22659+#endif
22660+ } else {
22661+ cycle_t uninitialized_var(stop);
22662+
22663+#ifdef CONFIG_INTERRUPT_OFF_HIST
22664+ if ((reason == IRQS_ON || reason == TRACE_STOP) &&
22665+ per_cpu(hist_irqsoff_counting, cpu)) {
22666+ cycle_t start = per_cpu(hist_irqsoff_start, cpu);
22667+
22668+ stop = ftrace_now(cpu);
22669+ time_set++;
22670+ if (start) {
22671+ long latency = ((long) (stop - start)) /
22672+ NSECS_PER_USECS;
22673+
22674+ latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
22675+ stop, NULL);
22676+ }
22677+ per_cpu(hist_irqsoff_counting, cpu) = 0;
22678+ }
22679+#endif
22680+
22681+#ifdef CONFIG_PREEMPT_OFF_HIST
22682+ if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
22683+ per_cpu(hist_preemptoff_counting, cpu)) {
22684+ cycle_t start = per_cpu(hist_preemptoff_start, cpu);
22685+
22686+ if (!(time_set++))
22687+ stop = ftrace_now(cpu);
22688+ if (start) {
22689+ long latency = ((long) (stop - start)) /
22690+ NSECS_PER_USECS;
22691+
22692+ latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
22693+ 0, stop, NULL);
22694+ }
22695+ per_cpu(hist_preemptoff_counting, cpu) = 0;
22696+ }
22697+#endif
22698+
22699+#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
22700+ if ((!per_cpu(hist_irqsoff_counting, cpu) ||
22701+ !per_cpu(hist_preemptoff_counting, cpu)) &&
22702+ per_cpu(hist_preemptirqsoff_counting, cpu)) {
22703+ cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
22704+
22705+ if (!time_set)
22706+ stop = ftrace_now(cpu);
22707+ if (start) {
22708+ long latency = ((long) (stop - start)) /
22709+ NSECS_PER_USECS;
22710+
22711+ latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
22712+ latency, 0, stop, NULL);
22713+ }
22714+ per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
22715+ }
22716+#endif
22717+ }
22718+}
22719+#endif
22720+
22721+#ifdef CONFIG_WAKEUP_LATENCY_HIST
22722+static DEFINE_RAW_SPINLOCK(wakeup_lock);
22723+static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
22724+ int cpu)
22725+{
22726+ int old_cpu = task_cpu(task);
22727+
22728+ if (cpu != old_cpu) {
22729+ unsigned long flags;
22730+ struct task_struct *cpu_wakeup_task;
22731+
22732+ raw_spin_lock_irqsave(&wakeup_lock, flags);
22733+
22734+ cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
22735+ if (task == cpu_wakeup_task) {
22736+ put_task_struct(cpu_wakeup_task);
22737+ per_cpu(wakeup_task, old_cpu) = NULL;
22738+ cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
22739+ get_task_struct(cpu_wakeup_task);
22740+ }
22741+
22742+ raw_spin_unlock_irqrestore(&wakeup_lock, flags);
22743+ }
22744+}
22745+
22746+static notrace void probe_wakeup_latency_hist_start(void *v,
22747+ struct task_struct *p, int success)
22748+{
22749+ unsigned long flags;
22750+ struct task_struct *curr = current;
22751+ int cpu = task_cpu(p);
22752+ struct task_struct *cpu_wakeup_task;
22753+
22754+ raw_spin_lock_irqsave(&wakeup_lock, flags);
22755+
22756+ cpu_wakeup_task = per_cpu(wakeup_task, cpu);
22757+
22758+ if (wakeup_pid) {
22759+ if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
22760+ p->prio == curr->prio)
22761+ per_cpu(wakeup_sharedprio, cpu) = 1;
22762+ if (likely(wakeup_pid != task_pid_nr(p)))
22763+ goto out;
22764+ } else {
22765+ if (likely(!rt_task(p)) ||
22766+ (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
22767+ p->prio > curr->prio)
22768+ goto out;
22769+ if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
22770+ p->prio == curr->prio)
22771+ per_cpu(wakeup_sharedprio, cpu) = 1;
22772+ }
22773+
22774+ if (cpu_wakeup_task)
22775+ put_task_struct(cpu_wakeup_task);
22776+ cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
22777+ get_task_struct(cpu_wakeup_task);
22778+ cpu_wakeup_task->preempt_timestamp_hist =
22779+ ftrace_now(raw_smp_processor_id());
22780+out:
22781+ raw_spin_unlock_irqrestore(&wakeup_lock, flags);
22782+}
22783+
22784+static notrace void probe_wakeup_latency_hist_stop(void *v,
22785+ struct task_struct *prev, struct task_struct *next)
22786+{
22787+ unsigned long flags;
22788+ int cpu = task_cpu(next);
22789+ long latency;
22790+ cycle_t stop;
22791+ struct task_struct *cpu_wakeup_task;
22792+
22793+ raw_spin_lock_irqsave(&wakeup_lock, flags);
22794+
22795+ cpu_wakeup_task = per_cpu(wakeup_task, cpu);
22796+
22797+ if (cpu_wakeup_task == NULL)
22798+ goto out;
22799+
22800+ /* Already running? */
22801+ if (unlikely(current == cpu_wakeup_task))
22802+ goto out_reset;
22803+
22804+ if (next != cpu_wakeup_task) {
22805+ if (next->prio < cpu_wakeup_task->prio)
22806+ goto out_reset;
22807+
22808+ if (next->prio == cpu_wakeup_task->prio)
22809+ per_cpu(wakeup_sharedprio, cpu) = 1;
22810+
22811+ goto out;
22812+ }
22813+
22814+ if (current->prio == cpu_wakeup_task->prio)
22815+ per_cpu(wakeup_sharedprio, cpu) = 1;
22816+
22817+ /*
22818+ * The task we are waiting for is about to be switched to.
22819+ * Calculate latency and store it in histogram.
22820+ */
22821+ stop = ftrace_now(raw_smp_processor_id());
22822+
22823+ latency = ((long) (stop - next->preempt_timestamp_hist)) /
22824+ NSECS_PER_USECS;
22825+
22826+ if (per_cpu(wakeup_sharedprio, cpu)) {
22827+ latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
22828+ next);
22829+ per_cpu(wakeup_sharedprio, cpu) = 0;
22830+ } else {
22831+ latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
22832+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
22833+ if (timerandwakeup_enabled_data.enabled) {
22834+ latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
22835+ next->timer_offset + latency, next->timer_offset,
22836+ stop, next);
22837+ }
22838+#endif
22839+ }
22840+
22841+out_reset:
22842+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
22843+ next->timer_offset = 0;
22844+#endif
22845+ put_task_struct(cpu_wakeup_task);
22846+ per_cpu(wakeup_task, cpu) = NULL;
22847+out:
22848+ raw_spin_unlock_irqrestore(&wakeup_lock, flags);
22849+}
22850+#endif
22851+
22852+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
22853+static notrace void probe_hrtimer_interrupt(void *v, int cpu,
22854+ long long latency_ns, struct task_struct *curr, struct task_struct *task)
22855+{
22856+ if (latency_ns <= 0 && task != NULL && rt_task(task) &&
22857+ (task->prio < curr->prio ||
22858+ (task->prio == curr->prio &&
22859+ !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
22860+ long latency;
22861+ cycle_t now;
22862+
22863+ if (missed_timer_offsets_pid) {
22864+ if (likely(missed_timer_offsets_pid !=
22865+ task_pid_nr(task)))
22866+ return;
22867+ }
22868+
22869+ now = ftrace_now(cpu);
22870+ latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
22871+ latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
22872+ task);
22873+#ifdef CONFIG_WAKEUP_LATENCY_HIST
22874+ task->timer_offset = latency;
22875+#endif
22876+ }
22877+}
22878+#endif
22879+
22880+static __init int latency_hist_init(void)
22881+{
22882+ struct dentry *latency_hist_root = NULL;
22883+ struct dentry *dentry;
22884+#ifdef CONFIG_WAKEUP_LATENCY_HIST
22885+ struct dentry *dentry_sharedprio;
22886+#endif
22887+ struct dentry *entry;
22888+ struct dentry *enable_root;
22889+ int i = 0;
22890+ struct hist_data *my_hist;
22891+ char name[64];
22892+ char *cpufmt = "CPU%d";
22893+#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
22894+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
22895+ char *cpufmt_maxlatproc = "max_latency-CPU%d";
22896+ struct maxlatproc_data *mp = NULL;
22897+#endif
22898+
22899+ dentry = tracing_init_dentry();
22900+ latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
22901+ enable_root = debugfs_create_dir("enable", latency_hist_root);
22902+
22903+#ifdef CONFIG_INTERRUPT_OFF_HIST
22904+ dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
22905+ for_each_possible_cpu(i) {
22906+ sprintf(name, cpufmt, i);
22907+ entry = debugfs_create_file(name, 0444, dentry,
22908+ &per_cpu(irqsoff_hist, i), &latency_hist_fops);
22909+ my_hist = &per_cpu(irqsoff_hist, i);
22910+ atomic_set(&my_hist->hist_mode, 1);
22911+ my_hist->min_lat = LONG_MAX;
22912+ }
22913+ entry = debugfs_create_file("reset", 0644, dentry,
22914+ (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
22915+#endif
22916+
22917+#ifdef CONFIG_PREEMPT_OFF_HIST
22918+ dentry = debugfs_create_dir(preemptoff_hist_dir,
22919+ latency_hist_root);
22920+ for_each_possible_cpu(i) {
22921+ sprintf(name, cpufmt, i);
22922+ entry = debugfs_create_file(name, 0444, dentry,
22923+ &per_cpu(preemptoff_hist, i), &latency_hist_fops);
22924+ my_hist = &per_cpu(preemptoff_hist, i);
22925+ atomic_set(&my_hist->hist_mode, 1);
22926+ my_hist->min_lat = LONG_MAX;
22927+ }
22928+ entry = debugfs_create_file("reset", 0644, dentry,
22929+ (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
22930+#endif
22931+
22932+#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
22933+ dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
22934+ latency_hist_root);
22935+ for_each_possible_cpu(i) {
22936+ sprintf(name, cpufmt, i);
22937+ entry = debugfs_create_file(name, 0444, dentry,
22938+ &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
22939+ my_hist = &per_cpu(preemptirqsoff_hist, i);
22940+ atomic_set(&my_hist->hist_mode, 1);
22941+ my_hist->min_lat = LONG_MAX;
22942+ }
22943+ entry = debugfs_create_file("reset", 0644, dentry,
22944+ (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
22945+#endif
22946+
22947+#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
22948+ entry = debugfs_create_file("preemptirqsoff", 0644,
22949+ enable_root, (void *)&preemptirqsoff_enabled_data,
22950+ &enable_fops);
22951+#endif
22952+
22953+#ifdef CONFIG_WAKEUP_LATENCY_HIST
22954+ dentry = debugfs_create_dir(wakeup_latency_hist_dir,
22955+ latency_hist_root);
22956+ dentry_sharedprio = debugfs_create_dir(
22957+ wakeup_latency_hist_dir_sharedprio, dentry);
22958+ for_each_possible_cpu(i) {
22959+ sprintf(name, cpufmt, i);
22960+
22961+ entry = debugfs_create_file(name, 0444, dentry,
22962+ &per_cpu(wakeup_latency_hist, i),
22963+ &latency_hist_fops);
22964+ my_hist = &per_cpu(wakeup_latency_hist, i);
22965+ atomic_set(&my_hist->hist_mode, 1);
22966+ my_hist->min_lat = LONG_MAX;
22967+
22968+ entry = debugfs_create_file(name, 0444, dentry_sharedprio,
22969+ &per_cpu(wakeup_latency_hist_sharedprio, i),
22970+ &latency_hist_fops);
22971+ my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
22972+ atomic_set(&my_hist->hist_mode, 1);
22973+ my_hist->min_lat = LONG_MAX;
22974+
22975+ sprintf(name, cpufmt_maxlatproc, i);
22976+
22977+ mp = &per_cpu(wakeup_maxlatproc, i);
22978+ entry = debugfs_create_file(name, 0444, dentry, mp,
22979+ &maxlatproc_fops);
22980+ clear_maxlatprocdata(mp);
22981+
22982+ mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
22983+ entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
22984+ &maxlatproc_fops);
22985+ clear_maxlatprocdata(mp);
22986+ }
22987+ entry = debugfs_create_file("pid", 0644, dentry,
22988+ (void *)&wakeup_pid, &pid_fops);
22989+ entry = debugfs_create_file("reset", 0644, dentry,
22990+ (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
22991+ entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
22992+ (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
22993+ entry = debugfs_create_file("wakeup", 0644,
22994+ enable_root, (void *)&wakeup_latency_enabled_data,
22995+ &enable_fops);
22996+#endif
22997+
22998+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
22999+ dentry = debugfs_create_dir(missed_timer_offsets_dir,
23000+ latency_hist_root);
23001+ for_each_possible_cpu(i) {
23002+ sprintf(name, cpufmt, i);
23003+ entry = debugfs_create_file(name, 0444, dentry,
23004+ &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
23005+ my_hist = &per_cpu(missed_timer_offsets, i);
23006+ atomic_set(&my_hist->hist_mode, 1);
23007+ my_hist->min_lat = LONG_MAX;
23008+
23009+ sprintf(name, cpufmt_maxlatproc, i);
23010+ mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
23011+ entry = debugfs_create_file(name, 0444, dentry, mp,
23012+ &maxlatproc_fops);
23013+ clear_maxlatprocdata(mp);
23014+ }
23015+ entry = debugfs_create_file("pid", 0644, dentry,
23016+ (void *)&missed_timer_offsets_pid, &pid_fops);
23017+ entry = debugfs_create_file("reset", 0644, dentry,
23018+ (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
23019+ entry = debugfs_create_file("missed_timer_offsets", 0644,
23020+ enable_root, (void *)&missed_timer_offsets_enabled_data,
23021+ &enable_fops);
23022+#endif
23023+
23024+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
23025+ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
23026+ dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
23027+ latency_hist_root);
23028+ for_each_possible_cpu(i) {
23029+ sprintf(name, cpufmt, i);
23030+ entry = debugfs_create_file(name, 0444, dentry,
23031+ &per_cpu(timerandwakeup_latency_hist, i),
23032+ &latency_hist_fops);
23033+ my_hist = &per_cpu(timerandwakeup_latency_hist, i);
23034+ atomic_set(&my_hist->hist_mode, 1);
23035+ my_hist->min_lat = LONG_MAX;
23036+
23037+ sprintf(name, cpufmt_maxlatproc, i);
23038+ mp = &per_cpu(timerandwakeup_maxlatproc, i);
23039+ entry = debugfs_create_file(name, 0444, dentry, mp,
23040+ &maxlatproc_fops);
23041+ clear_maxlatprocdata(mp);
23042+ }
23043+ entry = debugfs_create_file("reset", 0644, dentry,
23044+ (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
23045+ entry = debugfs_create_file("timerandwakeup", 0644,
23046+ enable_root, (void *)&timerandwakeup_enabled_data,
23047+ &enable_fops);
23048+#endif
23049+ return 0;
23050+}
23051+
23052+__initcall(latency_hist_init);
23053diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
23054index fe1d581..dfe9201 100644
23055--- a/kernel/trace/trace.c
23056+++ b/kernel/trace/trace.c
23057@@ -1167,6 +1167,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
23058 struct task_struct *tsk = current;
23059
23060 entry->preempt_count = pc & 0xff;
23061+ entry->preempt_lazy_count = preempt_lazy_count();
23062 entry->pid = (tsk) ? tsk->pid : 0;
23063 entry->padding = 0;
23064 entry->flags =
23065@@ -1177,7 +1178,10 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
23066 #endif
23067 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
23068 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
23069- (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
23070+ (need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
23071+ (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0);
23072+
23073+ entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
23074 }
23075 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
23076
23077@@ -2030,14 +2034,17 @@ get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *e
23078
23079 static void print_lat_help_header(struct seq_file *m)
23080 {
23081- seq_puts(m, "# _------=> CPU# \n");
23082- seq_puts(m, "# / _-----=> irqs-off \n");
23083- seq_puts(m, "# | / _----=> need-resched \n");
23084- seq_puts(m, "# || / _---=> hardirq/softirq \n");
23085- seq_puts(m, "# ||| / _--=> preempt-depth \n");
23086- seq_puts(m, "# |||| / delay \n");
23087- seq_puts(m, "# cmd pid ||||| time | caller \n");
23088- seq_puts(m, "# \\ / ||||| \\ | / \n");
23089+ seq_puts(m, "# _--------=> CPU# \n");
23090+ seq_puts(m, "# / _-------=> irqs-off \n");
23091+ seq_puts(m, "# | / _------=> need-resched \n");
23092+ seq_puts(m, "# || / _-----=> need-resched_lazy \n");
23093+ seq_puts(m, "# ||| / _----=> hardirq/softirq \n");
23094+ seq_puts(m, "# |||| / _---=> preempt-depth \n");
23095+ seq_puts(m, "# ||||| / _--=> preempt-lazy-depth\n");
23096+ seq_puts(m, "# |||||| / _-=> migrate-disable \n");
23097+ seq_puts(m, "# ||||||| / delay \n");
23098+ seq_puts(m, "# cmd pid |||||||| time | caller \n");
23099+ seq_puts(m, "# \\ / |||||||| \\ | / \n");
23100 }
23101
23102 static void print_event_info(struct trace_array *tr, struct seq_file *m)
23103@@ -2061,13 +2068,16 @@ static void print_func_help_header(struct trace_array *tr, struct seq_file *m)
23104 static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m)
23105 {
23106 print_event_info(tr, m);
23107- seq_puts(m, "# _-----=> irqs-off\n");
23108- seq_puts(m, "# / _----=> need-resched\n");
23109- seq_puts(m, "# | / _---=> hardirq/softirq\n");
23110- seq_puts(m, "# || / _--=> preempt-depth\n");
23111- seq_puts(m, "# ||| / delay\n");
23112- seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n");
23113- seq_puts(m, "# | | | |||| | |\n");
23114+ seq_puts(m, "# _-------=> irqs-off \n");
23115+ seq_puts(m, "# / _------=> need-resched \n");
23116+ seq_puts(m, "# |/ _-----=> need-resched_lazy \n");
23117+ seq_puts(m, "# ||/ _----=> hardirq/softirq \n");
23118+ seq_puts(m, "# |||/ _---=> preempt-depth \n");
23119+ seq_puts(m, "# ||||/ _--=> preempt-lazy-depth\n");
23120+ seq_puts(m, "# ||||| / _-=> migrate-disable \n");
23121+ seq_puts(m, "# |||||| / delay\n");
23122+ seq_puts(m, "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n");
23123+ seq_puts(m, "# | | | ||||||| | |\n");
23124 }
23125
23126 void
23127diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
23128index 23f1d2c..15f4a31 100644
23129--- a/kernel/trace/trace.h
23130+++ b/kernel/trace/trace.h
23131@@ -116,6 +116,7 @@ struct uprobe_trace_entry_head {
23132 * NEED_RESCHED - reschedule is requested
23133 * HARDIRQ - inside an interrupt handler
23134 * SOFTIRQ - inside a softirq handler
23135+ * NEED_RESCHED_LAZY - lazy reschedule is requested
23136 */
23137 enum trace_flag_type {
23138 TRACE_FLAG_IRQS_OFF = 0x01,
23139@@ -123,6 +124,7 @@ enum trace_flag_type {
23140 TRACE_FLAG_NEED_RESCHED = 0x04,
23141 TRACE_FLAG_HARDIRQ = 0x08,
23142 TRACE_FLAG_SOFTIRQ = 0x10,
23143+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x20,
23144 };
23145
23146 #define TRACE_BUF_SIZE 1024
23147diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
23148index 880073d..a45a22d 100644
23149--- a/kernel/trace/trace_events.c
23150+++ b/kernel/trace/trace_events.c
23151@@ -116,7 +116,8 @@ static int trace_define_common_fields(void)
23152 __common_field(unsigned char, flags);
23153 __common_field(unsigned char, preempt_count);
23154 __common_field(int, pid);
23155- __common_field(int, padding);
23156+ __common_field(unsigned short, migrate_disable);
23157+ __common_field(unsigned short, padding);
23158
23159 return ret;
23160 }
23161diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
23162index 443b25b..f283bd0 100644
23163--- a/kernel/trace/trace_irqsoff.c
23164+++ b/kernel/trace/trace_irqsoff.c
23165@@ -17,6 +17,7 @@
23166 #include <linux/fs.h>
23167
23168 #include "trace.h"
23169+#include <trace/events/hist.h>
23170
23171 static struct trace_array *irqsoff_trace __read_mostly;
23172 static int tracer_enabled __read_mostly;
23173@@ -438,11 +439,13 @@ void start_critical_timings(void)
23174 {
23175 if (preempt_trace() || irq_trace())
23176 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
23177+ trace_preemptirqsoff_hist(TRACE_START, 1);
23178 }
23179 EXPORT_SYMBOL_GPL(start_critical_timings);
23180
23181 void stop_critical_timings(void)
23182 {
23183+ trace_preemptirqsoff_hist(TRACE_STOP, 0);
23184 if (preempt_trace() || irq_trace())
23185 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
23186 }
23187@@ -452,6 +455,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings);
23188 #ifdef CONFIG_PROVE_LOCKING
23189 void time_hardirqs_on(unsigned long a0, unsigned long a1)
23190 {
23191+ trace_preemptirqsoff_hist(IRQS_ON, 0);
23192 if (!preempt_trace() && irq_trace())
23193 stop_critical_timing(a0, a1);
23194 }
23195@@ -460,6 +464,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
23196 {
23197 if (!preempt_trace() && irq_trace())
23198 start_critical_timing(a0, a1);
23199+ trace_preemptirqsoff_hist(IRQS_OFF, 1);
23200 }
23201
23202 #else /* !CONFIG_PROVE_LOCKING */
23203@@ -485,6 +490,7 @@ inline void print_irqtrace_events(struct task_struct *curr)
23204 */
23205 void trace_hardirqs_on(void)
23206 {
23207+ trace_preemptirqsoff_hist(IRQS_ON, 0);
23208 if (!preempt_trace() && irq_trace())
23209 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
23210 }
23211@@ -494,11 +500,13 @@ void trace_hardirqs_off(void)
23212 {
23213 if (!preempt_trace() && irq_trace())
23214 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
23215+ trace_preemptirqsoff_hist(IRQS_OFF, 1);
23216 }
23217 EXPORT_SYMBOL(trace_hardirqs_off);
23218
23219 void trace_hardirqs_on_caller(unsigned long caller_addr)
23220 {
23221+ trace_preemptirqsoff_hist(IRQS_ON, 0);
23222 if (!preempt_trace() && irq_trace())
23223 stop_critical_timing(CALLER_ADDR0, caller_addr);
23224 }
23225@@ -508,6 +516,7 @@ void trace_hardirqs_off_caller(unsigned long caller_addr)
23226 {
23227 if (!preempt_trace() && irq_trace())
23228 start_critical_timing(CALLER_ADDR0, caller_addr);
23229+ trace_preemptirqsoff_hist(IRQS_OFF, 1);
23230 }
23231 EXPORT_SYMBOL(trace_hardirqs_off_caller);
23232
23233@@ -517,12 +526,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
23234 #ifdef CONFIG_PREEMPT_TRACER
23235 void trace_preempt_on(unsigned long a0, unsigned long a1)
23236 {
23237+ trace_preemptirqsoff_hist(PREEMPT_ON, 0);
23238 if (preempt_trace() && !irq_trace())
23239 stop_critical_timing(a0, a1);
23240 }
23241
23242 void trace_preempt_off(unsigned long a0, unsigned long a1)
23243 {
23244+ trace_preemptirqsoff_hist(PREEMPT_ON, 1);
23245 if (preempt_trace() && !irq_trace())
23246 start_critical_timing(a0, a1);
23247 }
23248diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
23249index 194d796..2b0aea4 100644
23250--- a/kernel/trace/trace_output.c
23251+++ b/kernel/trace/trace_output.c
23252@@ -564,6 +564,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
23253 {
23254 char hardsoft_irq;
23255 char need_resched;
23256+ char need_resched_lazy;
23257 char irqs_off;
23258 int hardirq;
23259 int softirq;
23260@@ -578,14 +579,17 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
23261 '.';
23262 need_resched =
23263 (entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.';
23264+ need_resched_lazy =
23265+ (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
23266 hardsoft_irq =
23267 (hardirq && softirq) ? 'H' :
23268 hardirq ? 'h' :
23269 softirq ? 's' :
23270 '.';
23271
23272- if (!trace_seq_printf(s, "%c%c%c",
23273- irqs_off, need_resched, hardsoft_irq))
23274+ if (!trace_seq_printf(s, "%c%c%c%c",
23275+ irqs_off, need_resched, need_resched_lazy,
23276+ hardsoft_irq))
23277 return 0;
23278
23279 if (entry->preempt_count)
23280@@ -593,6 +597,16 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
23281 else
23282 ret = trace_seq_putc(s, '.');
23283
23284+ if (entry->preempt_lazy_count)
23285+ ret = trace_seq_printf(s, "%x", entry->preempt_lazy_count);
23286+ else
23287+ ret = trace_seq_putc(s, '.');
23288+
23289+ if (entry->migrate_disable)
23290+ ret = trace_seq_printf(s, "%x", entry->migrate_disable);
23291+ else
23292+ ret = trace_seq_putc(s, '.');
23293+
23294 return ret;
23295 }
23296
23297diff --git a/kernel/user.c b/kernel/user.c
23298index 7f6ff2b..68b70d7 100644
23299--- a/kernel/user.c
23300+++ b/kernel/user.c
23301@@ -159,11 +159,11 @@ void free_uid(struct user_struct *up)
23302 if (!up)
23303 return;
23304
23305- local_irq_save(flags);
23306+ local_irq_save_nort(flags);
23307 if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
23308 free_user(up, flags);
23309 else
23310- local_irq_restore(flags);
23311+ local_irq_restore_nort(flags);
23312 }
23313
23314 struct user_struct *alloc_uid(kuid_t uid)
23315diff --git a/kernel/wait-simple.c b/kernel/wait-simple.c
23316new file mode 100644
23317index 0000000..4b9a0b5
23318--- /dev/null
23319+++ b/kernel/wait-simple.c
23320@@ -0,0 +1,119 @@
23321+/*
23322+ * Simple waitqueues without fancy flags and callbacks
23323+ *
23324+ * (C) 2011 Thomas Gleixner <tglx@linutronix.de>
23325+ *
23326+ * Based on kernel/wait.c
23327+ *
23328+ * For licencing details see kernel-base/COPYING
23329+ */
23330+#include <linux/init.h>
23331+#include <linux/export.h>
23332+#include <linux/sched.h>
23333+#include <linux/wait-simple.h>
23334+
23335+/* Adds w to head->list. Must be called with head->lock locked. */
23336+static inline void __swait_enqueue(struct swait_head *head, struct swaiter *w)
23337+{
23338+ list_add(&w->node, &head->list);
23339+}
23340+
23341+/* Removes w from head->list. Must be called with head->lock locked. */
23342+static inline void __swait_dequeue(struct swaiter *w)
23343+{
23344+ list_del_init(&w->node);
23345+}
23346+
23347+/* Check whether a head has waiters enqueued */
23348+static inline bool swait_head_has_waiters(struct swait_head *h)
23349+{
23350+ return !list_empty(&h->list);
23351+}
23352+
23353+void __init_swait_head(struct swait_head *head, struct lock_class_key *key)
23354+{
23355+ raw_spin_lock_init(&head->lock);
23356+ lockdep_set_class(&head->lock, key);
23357+ INIT_LIST_HEAD(&head->list);
23358+}
23359+EXPORT_SYMBOL(__init_swait_head);
23360+
23361+void swait_prepare_locked(struct swait_head *head, struct swaiter *w)
23362+{
23363+ w->task = current;
23364+ if (list_empty(&w->node))
23365+ __swait_enqueue(head, w);
23366+}
23367+
23368+void swait_prepare(struct swait_head *head, struct swaiter *w, int state)
23369+{
23370+ unsigned long flags;
23371+
23372+ raw_spin_lock_irqsave(&head->lock, flags);
23373+ swait_prepare_locked(head, w);
23374+ __set_current_state(state);
23375+ raw_spin_unlock_irqrestore(&head->lock, flags);
23376+}
23377+EXPORT_SYMBOL(swait_prepare);
23378+
23379+void swait_finish_locked(struct swait_head *head, struct swaiter *w)
23380+{
23381+ __set_current_state(TASK_RUNNING);
23382+ if (w->task)
23383+ __swait_dequeue(w);
23384+}
23385+
23386+void swait_finish(struct swait_head *head, struct swaiter *w)
23387+{
23388+ unsigned long flags;
23389+
23390+ __set_current_state(TASK_RUNNING);
23391+ if (w->task) {
23392+ raw_spin_lock_irqsave(&head->lock, flags);
23393+ __swait_dequeue(w);
23394+ raw_spin_unlock_irqrestore(&head->lock, flags);
23395+ }
23396+}
23397+EXPORT_SYMBOL(swait_finish);
23398+
23399+unsigned int
23400+__swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num)
23401+{
23402+ struct swaiter *curr, *next;
23403+ int woken = 0;
23404+
23405+ list_for_each_entry_safe(curr, next, &head->list, node) {
23406+ if (wake_up_state(curr->task, state)) {
23407+ __swait_dequeue(curr);
23408+ /*
23409+ * The waiting task can free the waiter as
23410+ * soon as curr->task = NULL is written,
23411+ * without taking any locks. A memory barrier
23412+ * is required here to prevent the following
23413+ * store to curr->task from getting ahead of
23414+ * the dequeue operation.
23415+ */
23416+ smp_wmb();
23417+ curr->task = NULL;
23418+ if (++woken == num)
23419+ break;
23420+ }
23421+ }
23422+ return woken;
23423+}
23424+
23425+unsigned int
23426+__swait_wake(struct swait_head *head, unsigned int state, unsigned int num)
23427+{
23428+ unsigned long flags;
23429+ int woken;
23430+
23431+ if (!swait_head_has_waiters(head))
23432+ return 0;
23433+
23434+ raw_spin_lock_irqsave(&head->lock, flags);
23435+ woken = __swait_wake_locked(head, state, num);
23436+ raw_spin_unlock_irqrestore(&head->lock, flags);
23437+ return woken;
23438+}
23439+EXPORT_SYMBOL(__swait_wake);
23440diff --git a/kernel/watchdog.c b/kernel/watchdog.c
23441index 75a2ab3..7bbc18a 100644
23442--- a/kernel/watchdog.c
23443+++ b/kernel/watchdog.c
23444@@ -206,6 +206,8 @@ static int is_softlockup(unsigned long touch_ts)
23445
23446 #ifdef CONFIG_HARDLOCKUP_DETECTOR
23447
23448+static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
23449+
23450 static struct perf_event_attr wd_hw_attr = {
23451 .type = PERF_TYPE_HARDWARE,
23452 .config = PERF_COUNT_HW_CPU_CYCLES,
23453@@ -240,10 +242,19 @@ static void watchdog_overflow_callback(struct perf_event *event,
23454 if (__this_cpu_read(hard_watchdog_warn) == true)
23455 return;
23456
23457- if (hardlockup_panic)
23458+ /*
23459+ * If early-printk is enabled then make sure we do not
23460+ * lock up in printk() and kill console logging:
23461+ */
23462+ printk_kill();
23463+
23464+ if (hardlockup_panic) {
23465 panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
23466- else
23467+ } else {
23468+ raw_spin_lock(&watchdog_output_lock);
23469 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
23470+ raw_spin_unlock(&watchdog_output_lock);
23471+ }
23472
23473 __this_cpu_write(hard_watchdog_warn, true);
23474 return;
23475@@ -347,6 +358,7 @@ static void watchdog_enable(unsigned int cpu)
23476 /* kick off the timer for the hardlockup detector */
23477 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
23478 hrtimer->function = watchdog_timer_fn;
23479+ hrtimer->irqsafe = 1;
23480
23481 if (!watchdog_enabled) {
23482 kthread_park(current);
23483diff --git a/kernel/workqueue.c b/kernel/workqueue.c
23484index 3a3a98f..11285e4 100644
23485--- a/kernel/workqueue.c
23486+++ b/kernel/workqueue.c
23487@@ -41,6 +41,7 @@
23488 #include <linux/debug_locks.h>
23489 #include <linux/lockdep.h>
23490 #include <linux/idr.h>
23491+#include <linux/locallock.h>
23492
23493 #include "workqueue_sched.h"
23494
23495@@ -278,6 +279,8 @@ EXPORT_SYMBOL_GPL(system_unbound_wq);
23496 struct workqueue_struct *system_freezable_wq __read_mostly;
23497 EXPORT_SYMBOL_GPL(system_freezable_wq);
23498
23499+static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
23500+
23501 #define CREATE_TRACE_POINTS
23502 #include <trace/events/workqueue.h>
23503
23504@@ -1092,7 +1095,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
23505 {
23506 struct global_cwq *gcwq;
23507
23508- local_irq_save(*flags);
23509+ local_lock_irqsave(pendingb_lock, *flags);
23510
23511 /* try to steal the timer if it exists */
23512 if (is_dwork) {
23513@@ -1151,7 +1154,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
23514 }
23515 spin_unlock(&gcwq->lock);
23516 fail:
23517- local_irq_restore(*flags);
23518+ local_unlock_irqrestore(pendingb_lock, *flags);
23519 if (work_is_canceling(work))
23520 return -ENOENT;
23521 cpu_relax();
23522@@ -1246,7 +1249,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
23523 * queued or lose PENDING. Grabbing PENDING and queueing should
23524 * happen with IRQ disabled.
23525 */
23526- WARN_ON_ONCE(!irqs_disabled());
23527+ WARN_ON_ONCE_NONRT(!irqs_disabled());
23528
23529 debug_work_activate(work);
23530
23531@@ -1336,14 +1339,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
23532 bool ret = false;
23533 unsigned long flags;
23534
23535- local_irq_save(flags);
23536+ local_lock_irqsave(pendingb_lock,flags);
23537
23538 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
23539 __queue_work(cpu, wq, work);
23540 ret = true;
23541 }
23542
23543- local_irq_restore(flags);
23544+ local_unlock_irqrestore(pendingb_lock, flags);
23545 return ret;
23546 }
23547 EXPORT_SYMBOL_GPL(queue_work_on);
23548@@ -1451,14 +1454,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
23549 unsigned long flags;
23550
23551 /* read the comment in __queue_work() */
23552- local_irq_save(flags);
23553+ local_lock_irqsave(pendingb_lock, flags);
23554
23555 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
23556 __queue_delayed_work(cpu, wq, dwork, delay);
23557 ret = true;
23558 }
23559
23560- local_irq_restore(flags);
23561+ local_unlock_irqrestore(pendingb_lock, flags);
23562 return ret;
23563 }
23564 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
23565@@ -1508,7 +1511,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
23566
23567 if (likely(ret >= 0)) {
23568 __queue_delayed_work(cpu, wq, dwork, delay);
23569- local_irq_restore(flags);
23570+ local_unlock_irqrestore(pendingb_lock, flags);
23571 }
23572
23573 /* -ENOENT from try_to_grab_pending() becomes %true */
23574@@ -2936,7 +2939,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
23575
23576 /* tell other tasks trying to grab @work to back off */
23577 mark_work_canceling(work);
23578- local_irq_restore(flags);
23579+ local_unlock_irqrestore(pendingb_lock, flags);
23580
23581 flush_work(work);
23582 clear_work_data(work);
23583@@ -2981,11 +2984,11 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
23584 */
23585 bool flush_delayed_work(struct delayed_work *dwork)
23586 {
23587- local_irq_disable();
23588+ local_lock_irq(pendingb_lock);
23589 if (del_timer_sync(&dwork->timer))
23590 __queue_work(dwork->cpu,
23591 get_work_cwq(&dwork->work)->wq, &dwork->work);
23592- local_irq_enable();
23593+ local_unlock_irq(pendingb_lock);
23594 return flush_work(&dwork->work);
23595 }
23596 EXPORT_SYMBOL(flush_delayed_work);
23597@@ -3015,7 +3018,7 @@ bool cancel_delayed_work(struct delayed_work *dwork)
23598 return false;
23599
23600 set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));
23601- local_irq_restore(flags);
23602+ local_unlock_irqrestore(pendingb_lock, flags);
23603 return ret;
23604 }
23605 EXPORT_SYMBOL(cancel_delayed_work);
23606diff --git a/lib/Kconfig b/lib/Kconfig
23607index 75cdb77..7669d65 100644
23608--- a/lib/Kconfig
23609+++ b/lib/Kconfig
23610@@ -315,6 +315,7 @@ config CHECK_SIGNATURE
23611
23612 config CPUMASK_OFFSTACK
23613 bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
23614+ depends on !PREEMPT_RT_FULL
23615 help
23616 Use dynamic allocation for cpumask_var_t, instead of putting
23617 them on the stack. This is a bit more expensive, but avoids
23618diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
23619index 67604e5..24b60ba 100644
23620--- a/lib/Kconfig.debug
23621+++ b/lib/Kconfig.debug
23622@@ -164,7 +164,7 @@ config DEBUG_KERNEL
23623
23624 config DEBUG_SHIRQ
23625 bool "Debug shared IRQ handlers"
23626- depends on DEBUG_KERNEL && GENERIC_HARDIRQS
23627+ depends on DEBUG_KERNEL && GENERIC_HARDIRQS && !PREEMPT_RT_BASE
23628 help
23629 Enable this to generate a spurious interrupt as soon as a shared
23630 interrupt handler is registered, and just before one is deregistered.
23631diff --git a/lib/Makefile b/lib/Makefile
23632index 02ed6c0..7e961f1 100644
23633--- a/lib/Makefile
23634+++ b/lib/Makefile
23635@@ -38,8 +38,11 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
23636 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
23637 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
23638 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
23639+
23640+ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
23641 lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
23642 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
23643+endif
23644 lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
23645
23646 CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
23647diff --git a/lib/debugobjects.c b/lib/debugobjects.c
23648index d11808c..cf5f02f 100644
23649--- a/lib/debugobjects.c
23650+++ b/lib/debugobjects.c
23651@@ -309,7 +309,10 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
23652 struct debug_obj *obj;
23653 unsigned long flags;
23654
23655- fill_pool();
23656+#ifdef CONFIG_PREEMPT_RT_FULL
23657+ if (preempt_count() == 0 && !irqs_disabled())
23658+#endif
23659+ fill_pool();
23660
23661 db = get_bucket((unsigned long) addr);
23662
23663diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
23664index 7aae0f2..23b8564 100644
23665--- a/lib/locking-selftest.c
23666+++ b/lib/locking-selftest.c
23667@@ -47,10 +47,10 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose);
23668 * Normal standalone locks, for the circular and irq-context
23669 * dependency tests:
23670 */
23671-static DEFINE_SPINLOCK(lock_A);
23672-static DEFINE_SPINLOCK(lock_B);
23673-static DEFINE_SPINLOCK(lock_C);
23674-static DEFINE_SPINLOCK(lock_D);
23675+static DEFINE_RAW_SPINLOCK(lock_A);
23676+static DEFINE_RAW_SPINLOCK(lock_B);
23677+static DEFINE_RAW_SPINLOCK(lock_C);
23678+static DEFINE_RAW_SPINLOCK(lock_D);
23679
23680 static DEFINE_RWLOCK(rwlock_A);
23681 static DEFINE_RWLOCK(rwlock_B);
23682@@ -73,12 +73,12 @@ static DECLARE_RWSEM(rwsem_D);
23683 * but X* and Y* are different classes. We do this so that
23684 * we do not trigger a real lockup:
23685 */
23686-static DEFINE_SPINLOCK(lock_X1);
23687-static DEFINE_SPINLOCK(lock_X2);
23688-static DEFINE_SPINLOCK(lock_Y1);
23689-static DEFINE_SPINLOCK(lock_Y2);
23690-static DEFINE_SPINLOCK(lock_Z1);
23691-static DEFINE_SPINLOCK(lock_Z2);
23692+static DEFINE_RAW_SPINLOCK(lock_X1);
23693+static DEFINE_RAW_SPINLOCK(lock_X2);
23694+static DEFINE_RAW_SPINLOCK(lock_Y1);
23695+static DEFINE_RAW_SPINLOCK(lock_Y2);
23696+static DEFINE_RAW_SPINLOCK(lock_Z1);
23697+static DEFINE_RAW_SPINLOCK(lock_Z2);
23698
23699 static DEFINE_RWLOCK(rwlock_X1);
23700 static DEFINE_RWLOCK(rwlock_X2);
23701@@ -107,10 +107,10 @@ static DECLARE_RWSEM(rwsem_Z2);
23702 */
23703 #define INIT_CLASS_FUNC(class) \
23704 static noinline void \
23705-init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \
23706- struct rw_semaphore *rwsem) \
23707+init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \
23708+ struct mutex *mutex, struct rw_semaphore *rwsem)\
23709 { \
23710- spin_lock_init(lock); \
23711+ raw_spin_lock_init(lock); \
23712 rwlock_init(rwlock); \
23713 mutex_init(mutex); \
23714 init_rwsem(rwsem); \
23715@@ -168,10 +168,10 @@ static void init_shared_classes(void)
23716 * Shortcuts for lock/unlock API variants, to keep
23717 * the testcases compact:
23718 */
23719-#define L(x) spin_lock(&lock_##x)
23720-#define U(x) spin_unlock(&lock_##x)
23721+#define L(x) raw_spin_lock(&lock_##x)
23722+#define U(x) raw_spin_unlock(&lock_##x)
23723 #define LU(x) L(x); U(x)
23724-#define SI(x) spin_lock_init(&lock_##x)
23725+#define SI(x) raw_spin_lock_init(&lock_##x)
23726
23727 #define WL(x) write_lock(&rwlock_##x)
23728 #define WU(x) write_unlock(&rwlock_##x)
23729@@ -911,7 +911,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
23730
23731 #define I2(x) \
23732 do { \
23733- spin_lock_init(&lock_##x); \
23734+ raw_spin_lock_init(&lock_##x); \
23735 rwlock_init(&rwlock_##x); \
23736 mutex_init(&mutex_##x); \
23737 init_rwsem(&rwsem_##x); \
23738@@ -1175,6 +1175,7 @@ void locking_selftest(void)
23739
23740 printk(" --------------------------------------------------------------------------\n");
23741
23742+#ifndef CONFIG_PREEMPT_RT_FULL
23743 /*
23744 * irq-context testcases:
23745 */
23746@@ -1187,6 +1188,28 @@ void locking_selftest(void)
23747
23748 DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
23749 // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
23750+#else
23751+ /* On -rt, we only do hardirq context test for raw spinlock */
23752+ DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
23753+ DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
23754+
23755+ DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
23756+ DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
23757+
23758+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
23759+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
23760+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
23761+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
23762+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
23763+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
23764+
23765+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
23766+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
23767+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
23768+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
23769+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
23770+ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
23771+#endif
23772
23773 if (unexpected_testcase_failures) {
23774 printk("-----------------------------------------------------------------\n");
23775diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
23776index 652a8ee..2db0f42 100644
23777--- a/lib/percpu-rwsem.c
23778+++ b/lib/percpu-rwsem.c
23779@@ -84,8 +84,12 @@ void percpu_down_read(struct percpu_rw_semaphore *brw)
23780
23781 down_read(&brw->rw_sem);
23782 atomic_inc(&brw->slow_read_ctr);
23783+#ifdef CONFIG_PREEMPT_RT_FULL
23784+ up_read(&brw->rw_sem);
23785+#else
23786 /* avoid up_read()->rwsem_release() */
23787 __up_read(&brw->rw_sem);
23788+#endif
23789 }
23790
23791 void percpu_up_read(struct percpu_rw_semaphore *brw)
23792diff --git a/lib/radix-tree.c b/lib/radix-tree.c
23793index e796429..63bac7d 100644
23794--- a/lib/radix-tree.c
23795+++ b/lib/radix-tree.c
23796@@ -215,12 +215,13 @@ radix_tree_node_alloc(struct radix_tree_root *root)
23797 * succeed in getting a node here (and never reach
23798 * kmem_cache_alloc)
23799 */
23800- rtp = &__get_cpu_var(radix_tree_preloads);
23801+ rtp = &get_cpu_var(radix_tree_preloads);
23802 if (rtp->nr) {
23803 ret = rtp->nodes[rtp->nr - 1];
23804 rtp->nodes[rtp->nr - 1] = NULL;
23805 rtp->nr--;
23806 }
23807+ put_cpu_var(radix_tree_preloads);
23808 }
23809 if (ret == NULL)
23810 ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
23811@@ -255,6 +256,7 @@ radix_tree_node_free(struct radix_tree_node *node)
23812 call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
23813 }
23814
23815+#ifndef CONFIG_PREEMPT_RT_FULL
23816 /*
23817 * Load up this CPU's radix_tree_node buffer with sufficient objects to
23818 * ensure that the addition of a single element in the tree cannot fail. On
23819@@ -289,6 +291,7 @@ out:
23820 return ret;
23821 }
23822 EXPORT_SYMBOL(radix_tree_preload);
23823+#endif
23824
23825 /*
23826 * Return the maximum key which can be store into a
23827diff --git a/lib/scatterlist.c b/lib/scatterlist.c
23828index 7874b01..43603ee 100644
23829--- a/lib/scatterlist.c
23830+++ b/lib/scatterlist.c
23831@@ -499,7 +499,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
23832 flush_kernel_dcache_page(miter->page);
23833
23834 if (miter->__flags & SG_MITER_ATOMIC) {
23835- WARN_ON_ONCE(preemptible());
23836+ WARN_ON_ONCE(!pagefault_disabled());
23837 kunmap_atomic(miter->addr);
23838 } else
23839 kunmap(miter->page);
23840@@ -539,7 +539,7 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
23841
23842 sg_miter_start(&miter, sgl, nents, sg_flags);
23843
23844- local_irq_save(flags);
23845+ local_irq_save_nort(flags);
23846
23847 while (sg_miter_next(&miter) && offset < buflen) {
23848 unsigned int len;
23849@@ -556,7 +556,7 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
23850
23851 sg_miter_stop(&miter);
23852
23853- local_irq_restore(flags);
23854+ local_irq_restore_nort(flags);
23855 return offset;
23856 }
23857
23858diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
23859index 4c0d0e5..dbb1570 100644
23860--- a/lib/smp_processor_id.c
23861+++ b/lib/smp_processor_id.c
23862@@ -39,9 +39,9 @@ notrace unsigned int debug_smp_processor_id(void)
23863 if (!printk_ratelimit())
23864 goto out_enable;
23865
23866- printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] "
23867- "code: %s/%d\n",
23868- preempt_count() - 1, current->comm, current->pid);
23869+ printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x %08x] "
23870+ "code: %s/%d\n", preempt_count() - 1,
23871+ __migrate_disabled(current), current->comm, current->pid);
23872 print_symbol("caller is %s\n", (long)__builtin_return_address(0));
23873 dump_stack();
23874
23875diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
23876index 0374a59..9497033 100644
23877--- a/lib/spinlock_debug.c
23878+++ b/lib/spinlock_debug.c
23879@@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
23880
23881 EXPORT_SYMBOL(__raw_spin_lock_init);
23882
23883+#ifndef CONFIG_PREEMPT_RT_FULL
23884 void __rwlock_init(rwlock_t *lock, const char *name,
23885 struct lock_class_key *key)
23886 {
23887@@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
23888 }
23889
23890 EXPORT_SYMBOL(__rwlock_init);
23891+#endif
23892
23893 static void spin_dump(raw_spinlock_t *lock, const char *msg)
23894 {
23895@@ -159,6 +161,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock)
23896 arch_spin_unlock(&lock->raw_lock);
23897 }
23898
23899+#ifndef CONFIG_PREEMPT_RT_FULL
23900 static void rwlock_bug(rwlock_t *lock, const char *msg)
23901 {
23902 if (!debug_locks_off())
23903@@ -300,3 +303,5 @@ void do_raw_write_unlock(rwlock_t *lock)
23904 debug_write_unlock(lock);
23905 arch_write_unlock(&lock->raw_lock);
23906 }
23907+
23908+#endif
23909diff --git a/localversion-rt b/localversion-rt
23910new file mode 100644
23911index 0000000..700c857
23912--- /dev/null
23913+++ b/localversion-rt
23914@@ -0,0 +1 @@
23915+-rt8
23916diff --git a/mm/Kconfig b/mm/Kconfig
23917index 278e3ab..c6ceefc 100644
23918--- a/mm/Kconfig
23919+++ b/mm/Kconfig
23920@@ -353,7 +353,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
23921
23922 config TRANSPARENT_HUGEPAGE
23923 bool "Transparent Hugepage Support"
23924- depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
23925+ depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
23926 select COMPACTION
23927 help
23928 Transparent Hugepages allows the kernel to use huge pages and
23929diff --git a/mm/bounce.c b/mm/bounce.c
23930index 0420867..1e78ef7 100644
23931--- a/mm/bounce.c
23932+++ b/mm/bounce.c
23933@@ -51,11 +51,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
23934 unsigned long flags;
23935 unsigned char *vto;
23936
23937- local_irq_save(flags);
23938+ local_irq_save_nort(flags);
23939 vto = kmap_atomic(to->bv_page);
23940 memcpy(vto + to->bv_offset, vfrom, to->bv_len);
23941 kunmap_atomic(vto);
23942- local_irq_restore(flags);
23943+ local_irq_restore_nort(flags);
23944 }
23945
23946 #else /* CONFIG_HIGHMEM */
23947diff --git a/mm/filemap.c b/mm/filemap.c
23948index 83efee7..cb81968 100644
23949--- a/mm/filemap.c
23950+++ b/mm/filemap.c
23951@@ -1955,7 +1955,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
23952 char *kaddr;
23953 size_t copied;
23954
23955- BUG_ON(!in_atomic());
23956+ BUG_ON(!pagefault_disabled());
23957 kaddr = kmap_atomic(page);
23958 if (likely(i->nr_segs == 1)) {
23959 int left;
23960diff --git a/mm/highmem.c b/mm/highmem.c
23961index b32b70c..b1c7d43 100644
23962--- a/mm/highmem.c
23963+++ b/mm/highmem.c
23964@@ -29,10 +29,11 @@
23965 #include <linux/kgdb.h>
23966 #include <asm/tlbflush.h>
23967
23968-
23969+#ifndef CONFIG_PREEMPT_RT_FULL
23970 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
23971 DEFINE_PER_CPU(int, __kmap_atomic_idx);
23972 #endif
23973+#endif
23974
23975 /*
23976 * Virtual_count is not a pure "count".
23977@@ -47,8 +48,9 @@ DEFINE_PER_CPU(int, __kmap_atomic_idx);
23978 unsigned long totalhigh_pages __read_mostly;
23979 EXPORT_SYMBOL(totalhigh_pages);
23980
23981-
23982+#ifndef CONFIG_PREEMPT_RT_FULL
23983 EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
23984+#endif
23985
23986 unsigned int nr_free_highpages (void)
23987 {
23988diff --git a/mm/memory.c b/mm/memory.c
23989index 32a495a..23b82ee 100644
23990--- a/mm/memory.c
23991+++ b/mm/memory.c
23992@@ -3717,6 +3717,32 @@ unlock:
23993 return 0;
23994 }
23995
23996+#ifdef CONFIG_PREEMPT_RT_FULL
23997+void pagefault_disable(void)
23998+{
23999+ migrate_disable();
24000+ current->pagefault_disabled++;
24001+ /*
24002+ * make sure to have issued the store before a pagefault
24003+ * can hit.
24004+ */
24005+ barrier();
24006+}
24007+EXPORT_SYMBOL(pagefault_disable);
24008+
24009+void pagefault_enable(void)
24010+{
24011+ /*
24012+ * make sure to issue those last loads/stores before enabling
24013+ * the pagefault handler again.
24014+ */
24015+ barrier();
24016+ current->pagefault_disabled--;
24017+ migrate_enable();
24018+}
24019+EXPORT_SYMBOL(pagefault_enable);
24020+#endif
24021+
24022 /*
24023 * By the time we get here, we already hold the mm semaphore
24024 */
24025@@ -4288,3 +4314,35 @@ void copy_user_huge_page(struct page *dst, struct page *src,
24026 }
24027 }
24028 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
24029+
24030+#if defined(CONFIG_PREEMPT_RT_FULL) && (USE_SPLIT_PTLOCKS > 0)
24031+/*
24032+ * Heinous hack, relies on the caller doing something like:
24033+ *
24034+ * pte = alloc_pages(PGALLOC_GFP, 0);
24035+ * if (pte)
24036+ * pgtable_page_ctor(pte);
24037+ * return pte;
24038+ *
24039+ * This ensures we release the page and return NULL when the
24040+ * lock allocation fails.
24041+ */
24042+struct page *pte_lock_init(struct page *page)
24043+{
24044+ page->ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
24045+ if (page->ptl) {
24046+ spin_lock_init(__pte_lockptr(page));
24047+ } else {
24048+ __free_page(page);
24049+ page = NULL;
24050+ }
24051+ return page;
24052+}
24053+
24054+void pte_lock_deinit(struct page *page)
24055+{
24056+ kfree(page->ptl);
24057+ page->mapping = NULL;
24058+}
24059+
24060+#endif
24061diff --git a/mm/mmu_context.c b/mm/mmu_context.c
24062index 3dcfaf4..1385e48 100644
24063--- a/mm/mmu_context.c
24064+++ b/mm/mmu_context.c
24065@@ -26,6 +26,7 @@ void use_mm(struct mm_struct *mm)
24066 struct task_struct *tsk = current;
24067
24068 task_lock(tsk);
24069+ preempt_disable_rt();
24070 active_mm = tsk->active_mm;
24071 if (active_mm != mm) {
24072 atomic_inc(&mm->mm_count);
24073@@ -33,6 +34,7 @@ void use_mm(struct mm_struct *mm)
24074 }
24075 tsk->mm = mm;
24076 switch_mm(active_mm, mm, tsk);
24077+ preempt_enable_rt();
24078 task_unlock(tsk);
24079
24080 if (active_mm != mm)
24081diff --git a/mm/page_alloc.c b/mm/page_alloc.c
24082index 6a83cd3..1734913 100644
24083--- a/mm/page_alloc.c
24084+++ b/mm/page_alloc.c
24085@@ -58,6 +58,7 @@
24086 #include <linux/prefetch.h>
24087 #include <linux/migrate.h>
24088 #include <linux/page-debug-flags.h>
24089+#include <linux/locallock.h>
24090
24091 #include <asm/tlbflush.h>
24092 #include <asm/div64.h>
24093@@ -219,6 +220,18 @@ EXPORT_SYMBOL(nr_node_ids);
24094 EXPORT_SYMBOL(nr_online_nodes);
24095 #endif
24096
24097+static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
24098+
24099+#ifdef CONFIG_PREEMPT_RT_BASE
24100+# define cpu_lock_irqsave(cpu, flags) \
24101+ local_lock_irqsave_on(pa_lock, flags, cpu)
24102+# define cpu_unlock_irqrestore(cpu, flags) \
24103+ local_unlock_irqrestore_on(pa_lock, flags, cpu)
24104+#else
24105+# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
24106+# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
24107+#endif
24108+
24109 int page_group_by_mobility_disabled __read_mostly;
24110
24111 void set_pageblock_migratetype(struct page *page, int migratetype)
24112@@ -612,7 +625,7 @@ static inline int free_pages_check(struct page *page)
24113 }
24114
24115 /*
24116- * Frees a number of pages from the PCP lists
24117+ * Frees a number of pages which have been collected from the pcp lists.
24118 * Assumes all pages on list are in same zone, and of same order.
24119 * count is the number of pages to free.
24120 *
24121@@ -623,16 +636,50 @@ static inline int free_pages_check(struct page *page)
24122 * pinned" detection logic.
24123 */
24124 static void free_pcppages_bulk(struct zone *zone, int count,
24125- struct per_cpu_pages *pcp)
24126+ struct list_head *list)
24127 {
24128- int migratetype = 0;
24129- int batch_free = 0;
24130 int to_free = count;
24131+ unsigned long flags;
24132
24133- spin_lock(&zone->lock);
24134+ spin_lock_irqsave(&zone->lock, flags);
24135 zone->all_unreclaimable = 0;
24136 zone->pages_scanned = 0;
24137
24138+ while (!list_empty(list)) {
24139+ struct page *page = list_first_entry(list, struct page, lru);
24140+ int mt; /* migratetype of the to-be-freed page */
24141+
24142+ /* must delete as __free_one_page list manipulates */
24143+ list_del(&page->lru);
24144+
24145+ mt = get_freepage_migratetype(page);
24146+ /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
24147+ __free_one_page(page, zone, 0, mt);
24148+ trace_mm_page_pcpu_drain(page, 0, mt);
24149+ if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) {
24150+ __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
24151+ if (is_migrate_cma(mt))
24152+ __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
24153+ }
24154+
24155+ to_free--;
24156+ }
24157+ WARN_ON(to_free != 0);
24158+ spin_unlock_irqrestore(&zone->lock, flags);
24159+}
24160+
24161+/*
24162+ * Moves a number of pages from the PCP lists to free list which
24163+ * is freed outside of the locked region.
24164+ *
24165+ * Assumes all pages on list are in same zone, and of same order.
24166+ * count is the number of pages to free.
24167+ */
24168+static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
24169+ struct list_head *dst)
24170+{
24171+ int migratetype = 0, batch_free = 0;
24172+
24173 while (to_free) {
24174 struct page *page;
24175 struct list_head *list;
24176@@ -648,7 +695,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
24177 batch_free++;
24178 if (++migratetype == MIGRATE_PCPTYPES)
24179 migratetype = 0;
24180- list = &pcp->lists[migratetype];
24181+ list = &src->lists[migratetype];
24182 } while (list_empty(list));
24183
24184 /* This is the only non-empty list. Free them all. */
24185@@ -656,36 +703,26 @@ static void free_pcppages_bulk(struct zone *zone, int count,
24186 batch_free = to_free;
24187
24188 do {
24189- int mt; /* migratetype of the to-be-freed page */
24190-
24191- page = list_entry(list->prev, struct page, lru);
24192- /* must delete as __free_one_page list manipulates */
24193+ page = list_last_entry(list, struct page, lru);
24194 list_del(&page->lru);
24195- mt = get_freepage_migratetype(page);
24196- /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
24197- __free_one_page(page, zone, 0, mt);
24198- trace_mm_page_pcpu_drain(page, 0, mt);
24199- if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) {
24200- __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
24201- if (is_migrate_cma(mt))
24202- __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
24203- }
24204+ list_add(&page->lru, dst);
24205 } while (--to_free && --batch_free && !list_empty(list));
24206 }
24207- spin_unlock(&zone->lock);
24208 }
24209
24210 static void free_one_page(struct zone *zone, struct page *page, int order,
24211 int migratetype)
24212 {
24213- spin_lock(&zone->lock);
24214+ unsigned long flags;
24215+
24216+ spin_lock_irqsave(&zone->lock, flags);
24217 zone->all_unreclaimable = 0;
24218 zone->pages_scanned = 0;
24219
24220 __free_one_page(page, zone, order, migratetype);
24221 if (unlikely(migratetype != MIGRATE_ISOLATE))
24222 __mod_zone_freepage_state(zone, 1 << order, migratetype);
24223- spin_unlock(&zone->lock);
24224+ spin_unlock_irqrestore(&zone->lock, flags);
24225 }
24226
24227 static bool free_pages_prepare(struct page *page, unsigned int order)
24228@@ -722,12 +759,12 @@ static void __free_pages_ok(struct page *page, unsigned int order)
24229 if (!free_pages_prepare(page, order))
24230 return;
24231
24232- local_irq_save(flags);
24233+ local_lock_irqsave(pa_lock, flags);
24234 __count_vm_events(PGFREE, 1 << order);
24235 migratetype = get_pageblock_migratetype(page);
24236 set_freepage_migratetype(page, migratetype);
24237 free_one_page(page_zone(page), page, order, migratetype);
24238- local_irq_restore(flags);
24239+ local_unlock_irqrestore(pa_lock, flags);
24240 }
24241
24242 /*
24243@@ -1167,18 +1204,20 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
24244 void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
24245 {
24246 unsigned long flags;
24247+ LIST_HEAD(dst);
24248 int to_drain;
24249
24250- local_irq_save(flags);
24251+ local_lock_irqsave(pa_lock, flags);
24252 if (pcp->count >= pcp->batch)
24253 to_drain = pcp->batch;
24254 else
24255 to_drain = pcp->count;
24256 if (to_drain > 0) {
24257- free_pcppages_bulk(zone, to_drain, pcp);
24258+ isolate_pcp_pages(to_drain, pcp, &dst);
24259 pcp->count -= to_drain;
24260 }
24261- local_irq_restore(flags);
24262+ local_unlock_irqrestore(pa_lock, flags);
24263+ free_pcppages_bulk(zone, to_drain, &dst);
24264 }
24265 #endif
24266
24267@@ -1197,16 +1236,21 @@ static void drain_pages(unsigned int cpu)
24268 for_each_populated_zone(zone) {
24269 struct per_cpu_pageset *pset;
24270 struct per_cpu_pages *pcp;
24271+ LIST_HEAD(dst);
24272+ int count;
24273
24274- local_irq_save(flags);
24275+ cpu_lock_irqsave(cpu, flags);
24276 pset = per_cpu_ptr(zone->pageset, cpu);
24277
24278 pcp = &pset->pcp;
24279- if (pcp->count) {
24280- free_pcppages_bulk(zone, pcp->count, pcp);
24281+ count = pcp->count;
24282+ if (count) {
24283+ isolate_pcp_pages(count, pcp, &dst);
24284 pcp->count = 0;
24285 }
24286- local_irq_restore(flags);
24287+ cpu_unlock_irqrestore(cpu, flags);
24288+ if (count)
24289+ free_pcppages_bulk(zone, count, &dst);
24290 }
24291 }
24292
24293@@ -1259,7 +1303,12 @@ void drain_all_pages(void)
24294 else
24295 cpumask_clear_cpu(cpu, &cpus_with_pcps);
24296 }
24297+#ifndef CONFIG_PREEMPT_RT_BASE
24298 on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
24299+#else
24300+ for_each_cpu(cpu, &cpus_with_pcps)
24301+ drain_pages(cpu);
24302+#endif
24303 }
24304
24305 #ifdef CONFIG_HIBERNATION
24306@@ -1314,7 +1363,7 @@ void free_hot_cold_page(struct page *page, int cold)
24307
24308 migratetype = get_pageblock_migratetype(page);
24309 set_freepage_migratetype(page, migratetype);
24310- local_irq_save(flags);
24311+ local_lock_irqsave(pa_lock, flags);
24312 __count_vm_event(PGFREE);
24313
24314 /*
24315@@ -1339,12 +1388,19 @@ void free_hot_cold_page(struct page *page, int cold)
24316 list_add(&page->lru, &pcp->lists[migratetype]);
24317 pcp->count++;
24318 if (pcp->count >= pcp->high) {
24319- free_pcppages_bulk(zone, pcp->batch, pcp);
24320+ LIST_HEAD(dst);
24321+ int count;
24322+
24323+ isolate_pcp_pages(pcp->batch, pcp, &dst);
24324 pcp->count -= pcp->batch;
24325+ count = pcp->batch;
24326+ local_unlock_irqrestore(pa_lock, flags);
24327+ free_pcppages_bulk(zone, count, &dst);
24328+ return;
24329 }
24330
24331 out:
24332- local_irq_restore(flags);
24333+ local_unlock_irqrestore(pa_lock, flags);
24334 }
24335
24336 /*
24337@@ -1473,7 +1529,7 @@ again:
24338 struct per_cpu_pages *pcp;
24339 struct list_head *list;
24340
24341- local_irq_save(flags);
24342+ local_lock_irqsave(pa_lock, flags);
24343 pcp = &this_cpu_ptr(zone->pageset)->pcp;
24344 list = &pcp->lists[migratetype];
24345 if (list_empty(list)) {
24346@@ -1505,18 +1561,20 @@ again:
24347 */
24348 WARN_ON_ONCE(order > 1);
24349 }
24350- spin_lock_irqsave(&zone->lock, flags);
24351+ local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
24352 page = __rmqueue(zone, order, migratetype);
24353- spin_unlock(&zone->lock);
24354- if (!page)
24355+ if (!page) {
24356+ spin_unlock(&zone->lock);
24357 goto failed;
24358+ }
24359 __mod_zone_freepage_state(zone, -(1 << order),
24360 get_pageblock_migratetype(page));
24361+ spin_unlock(&zone->lock);
24362 }
24363
24364 __count_zone_vm_events(PGALLOC, zone, 1 << order);
24365 zone_statistics(preferred_zone, zone, gfp_flags);
24366- local_irq_restore(flags);
24367+ local_unlock_irqrestore(pa_lock, flags);
24368
24369 VM_BUG_ON(bad_range(zone, page));
24370 if (prep_new_page(page, order, gfp_flags))
24371@@ -1524,7 +1582,7 @@ again:
24372 return page;
24373
24374 failed:
24375- local_irq_restore(flags);
24376+ local_unlock_irqrestore(pa_lock, flags);
24377 return NULL;
24378 }
24379
24380@@ -2146,8 +2204,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
24381 struct page *page;
24382
24383 /* Page migration frees to the PCP lists but we want merging */
24384- drain_pages(get_cpu());
24385- put_cpu();
24386+ drain_pages(get_cpu_light());
24387+ put_cpu_light();
24388
24389 page = get_page_from_freelist(gfp_mask, nodemask,
24390 order, zonelist, high_zoneidx,
24391@@ -5133,6 +5191,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
24392 void __init page_alloc_init(void)
24393 {
24394 hotcpu_notifier(page_alloc_cpu_notify, 0);
24395+ local_irq_lock_init(pa_lock);
24396 }
24397
24398 /*
24399@@ -5956,21 +6015,23 @@ static int __meminit __zone_pcp_update(void *data)
24400 {
24401 struct zone *zone = data;
24402 int cpu;
24403- unsigned long batch = zone_batchsize(zone), flags;
24404+ unsigned long flags;
24405
24406 for_each_possible_cpu(cpu) {
24407 struct per_cpu_pageset *pset;
24408 struct per_cpu_pages *pcp;
24409+ LIST_HEAD(dst);
24410
24411 pset = per_cpu_ptr(zone->pageset, cpu);
24412 pcp = &pset->pcp;
24413
24414- local_irq_save(flags);
24415- if (pcp->count > 0)
24416- free_pcppages_bulk(zone, pcp->count, pcp);
24417+ cpu_lock_irqsave(cpu, flags);
24418+ if (pcp->count > 0) {
24419+ isolate_pcp_pages(pcp->count, pcp, &dst);
24420+ free_pcppages_bulk(zone, pcp->count, &dst);
24421+ }
24422 drain_zonestat(zone, pset);
24423- setup_pageset(pset, batch);
24424- local_irq_restore(flags);
24425+ cpu_unlock_irqrestore(cpu, flags);
24426 }
24427 return 0;
24428 }
24429@@ -5988,7 +6049,7 @@ void zone_pcp_reset(struct zone *zone)
24430 struct per_cpu_pageset *pset;
24431
24432 /* avoid races with drain_pages() */
24433- local_irq_save(flags);
24434+ local_lock_irqsave(pa_lock, flags);
24435 if (zone->pageset != &boot_pageset) {
24436 for_each_online_cpu(cpu) {
24437 pset = per_cpu_ptr(zone->pageset, cpu);
24438@@ -5997,7 +6058,7 @@ void zone_pcp_reset(struct zone *zone)
24439 free_percpu(zone->pageset);
24440 zone->pageset = &boot_pageset;
24441 }
24442- local_irq_restore(flags);
24443+ local_unlock_irqrestore(pa_lock, flags);
24444 }
24445
24446 #ifdef CONFIG_MEMORY_HOTREMOVE
24447diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
24448index 6d757e3a..98caeee 100644
24449--- a/mm/page_cgroup.c
24450+++ b/mm/page_cgroup.c
24451@@ -13,6 +13,14 @@
24452
24453 static unsigned long total_usage;
24454
24455+static void page_cgroup_lock_init(struct page_cgroup *pc, int nr_pages)
24456+{
24457+#ifdef CONFIG_PREEMPT_RT_BASE
24458+ for (; nr_pages; nr_pages--, pc++)
24459+ spin_lock_init(&pc->pcg_lock);
24460+#endif
24461+}
24462+
24463 #if !defined(CONFIG_SPARSEMEM)
24464
24465
24466@@ -60,6 +68,7 @@ static int __init alloc_node_page_cgroup(int nid)
24467 return -ENOMEM;
24468 NODE_DATA(nid)->node_page_cgroup = base;
24469 total_usage += table_size;
24470+ page_cgroup_lock_init(base, nr_pages);
24471 return 0;
24472 }
24473
24474@@ -150,6 +159,8 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
24475 return -ENOMEM;
24476 }
24477
24478+ page_cgroup_lock_init(base, PAGES_PER_SECTION);
24479+
24480 /*
24481 * The passed "pfn" may not be aligned to SECTION. For the calculation
24482 * we need to apply a mask.
24483diff --git a/mm/slab.c b/mm/slab.c
24484index e7667a3..6604ced 100644
24485--- a/mm/slab.c
24486+++ b/mm/slab.c
24487@@ -116,6 +116,7 @@
24488 #include <linux/kmemcheck.h>
24489 #include <linux/memory.h>
24490 #include <linux/prefetch.h>
24491+#include <linux/locallock.h>
24492
24493 #include <net/sock.h>
24494
24495@@ -696,12 +697,78 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
24496 #endif
24497
24498 static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
24499+static DEFINE_PER_CPU(struct list_head, slab_free_list);
24500+static DEFINE_LOCAL_IRQ_LOCK(slab_lock);
24501+
24502+#ifndef CONFIG_PREEMPT_RT_BASE
24503+# define slab_on_each_cpu(func, cp) on_each_cpu(func, cp, 1)
24504+#else
24505+/*
24506+ * execute func() for all CPUs. On PREEMPT_RT we dont actually have
24507+ * to run on the remote CPUs - we only have to take their CPU-locks.
24508+ * (This is a rare operation, so cacheline bouncing is not an issue.)
24509+ */
24510+static void
24511+slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg)
24512+{
24513+ unsigned int i;
24514+
24515+ get_cpu_light();
24516+ for_each_online_cpu(i)
24517+ func(arg, i);
24518+ put_cpu_light();
24519+}
24520+
24521+static void lock_slab_on(unsigned int cpu)
24522+{
24523+ local_lock_irq_on(slab_lock, cpu);
24524+}
24525+
24526+static void unlock_slab_on(unsigned int cpu)
24527+{
24528+ local_unlock_irq_on(slab_lock, cpu);
24529+}
24530+#endif
24531+
24532+static void free_delayed(struct list_head *h)
24533+{
24534+ while(!list_empty(h)) {
24535+ struct page *page = list_first_entry(h, struct page, lru);
24536+
24537+ list_del(&page->lru);
24538+ __free_pages(page, page->index);
24539+ }
24540+}
24541+
24542+static void unlock_l3_and_free_delayed(spinlock_t *list_lock)
24543+{
24544+ LIST_HEAD(tmp);
24545+
24546+ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
24547+ local_spin_unlock_irq(slab_lock, list_lock);
24548+ free_delayed(&tmp);
24549+}
24550+
24551+static void unlock_slab_and_free_delayed(unsigned long flags)
24552+{
24553+ LIST_HEAD(tmp);
24554+
24555+ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
24556+ local_unlock_irqrestore(slab_lock, flags);
24557+ free_delayed(&tmp);
24558+}
24559
24560 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
24561 {
24562 return cachep->array[smp_processor_id()];
24563 }
24564
24565+static inline struct array_cache *cpu_cache_get_on_cpu(struct kmem_cache *cachep,
24566+ int cpu)
24567+{
24568+ return cachep->array[cpu];
24569+}
24570+
24571 static inline struct kmem_cache *__find_general_cachep(size_t size,
24572 gfp_t gfpflags)
24573 {
24574@@ -1171,9 +1238,10 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
24575 if (l3->alien) {
24576 struct array_cache *ac = l3->alien[node];
24577
24578- if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
24579+ if (ac && ac->avail &&
24580+ local_spin_trylock_irq(slab_lock, &ac->lock)) {
24581 __drain_alien_cache(cachep, ac, node);
24582- spin_unlock_irq(&ac->lock);
24583+ local_spin_unlock_irq(slab_lock, &ac->lock);
24584 }
24585 }
24586 }
24587@@ -1188,9 +1256,9 @@ static void drain_alien_cache(struct kmem_cache *cachep,
24588 for_each_online_node(i) {
24589 ac = alien[i];
24590 if (ac) {
24591- spin_lock_irqsave(&ac->lock, flags);
24592+ local_spin_lock_irqsave(slab_lock, &ac->lock, flags);
24593 __drain_alien_cache(cachep, ac, i);
24594- spin_unlock_irqrestore(&ac->lock, flags);
24595+ local_spin_unlock_irqrestore(slab_lock, &ac->lock, flags);
24596 }
24597 }
24598 }
24599@@ -1269,11 +1337,11 @@ static int init_cache_nodelists_node(int node)
24600 cachep->nodelists[node] = l3;
24601 }
24602
24603- spin_lock_irq(&cachep->nodelists[node]->list_lock);
24604+ local_spin_lock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
24605 cachep->nodelists[node]->free_limit =
24606 (1 + nr_cpus_node(node)) *
24607 cachep->batchcount + cachep->num;
24608- spin_unlock_irq(&cachep->nodelists[node]->list_lock);
24609+ local_spin_unlock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
24610 }
24611 return 0;
24612 }
24613@@ -1298,7 +1366,7 @@ static void __cpuinit cpuup_canceled(long cpu)
24614 if (!l3)
24615 goto free_array_cache;
24616
24617- spin_lock_irq(&l3->list_lock);
24618+ local_spin_lock_irq(slab_lock, &l3->list_lock);
24619
24620 /* Free limit for this kmem_list3 */
24621 l3->free_limit -= cachep->batchcount;
24622@@ -1306,7 +1374,7 @@ static void __cpuinit cpuup_canceled(long cpu)
24623 free_block(cachep, nc->entry, nc->avail, node);
24624
24625 if (!cpumask_empty(mask)) {
24626- spin_unlock_irq(&l3->list_lock);
24627+ unlock_l3_and_free_delayed(&l3->list_lock);
24628 goto free_array_cache;
24629 }
24630
24631@@ -1320,7 +1388,7 @@ static void __cpuinit cpuup_canceled(long cpu)
24632 alien = l3->alien;
24633 l3->alien = NULL;
24634
24635- spin_unlock_irq(&l3->list_lock);
24636+ unlock_l3_and_free_delayed(&l3->list_lock);
24637
24638 kfree(shared);
24639 if (alien) {
24640@@ -1394,7 +1462,7 @@ static int __cpuinit cpuup_prepare(long cpu)
24641 l3 = cachep->nodelists[node];
24642 BUG_ON(!l3);
24643
24644- spin_lock_irq(&l3->list_lock);
24645+ local_spin_lock_irq(slab_lock, &l3->list_lock);
24646 if (!l3->shared) {
24647 /*
24648 * We are serialised from CPU_DEAD or
24649@@ -1409,7 +1477,7 @@ static int __cpuinit cpuup_prepare(long cpu)
24650 alien = NULL;
24651 }
24652 #endif
24653- spin_unlock_irq(&l3->list_lock);
24654+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
24655 kfree(shared);
24656 free_alien_cache(alien);
24657 if (cachep->flags & SLAB_DEBUG_OBJECTS)
24658@@ -1612,6 +1680,10 @@ void __init kmem_cache_init(void)
24659 if (num_possible_nodes() == 1)
24660 use_alien_caches = 0;
24661
24662+ local_irq_lock_init(slab_lock);
24663+ for_each_possible_cpu(i)
24664+ INIT_LIST_HEAD(&per_cpu(slab_free_list, i));
24665+
24666 for (i = 0; i < NUM_INIT_LISTS; i++)
24667 kmem_list3_init(&initkmem_list3[i]);
24668
24669@@ -1912,12 +1984,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
24670 /*
24671 * Interface to system's page release.
24672 */
24673-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
24674+static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed)
24675 {
24676 unsigned long i = (1 << cachep->gfporder);
24677- struct page *page = virt_to_page(addr);
24678+ struct page *page, *basepage = virt_to_page(addr);
24679 const unsigned long nr_freed = i;
24680
24681+ page = basepage;
24682+
24683 kmemcheck_free_shadow(page, cachep->gfporder);
24684
24685 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
24686@@ -1936,7 +2010,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
24687 memcg_release_pages(cachep, cachep->gfporder);
24688 if (current->reclaim_state)
24689 current->reclaim_state->reclaimed_slab += nr_freed;
24690- free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
24691+ if (!delayed) {
24692+ free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
24693+ } else {
24694+ basepage->index = cachep->gfporder;
24695+ list_add(&basepage->lru, &__get_cpu_var(slab_free_list));
24696+ }
24697 }
24698
24699 static void kmem_rcu_free(struct rcu_head *head)
24700@@ -1944,7 +2023,7 @@ static void kmem_rcu_free(struct rcu_head *head)
24701 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
24702 struct kmem_cache *cachep = slab_rcu->cachep;
24703
24704- kmem_freepages(cachep, slab_rcu->addr);
24705+ kmem_freepages(cachep, slab_rcu->addr, false);
24706 if (OFF_SLAB(cachep))
24707 kmem_cache_free(cachep->slabp_cache, slab_rcu);
24708 }
24709@@ -2163,7 +2242,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
24710 * Before calling the slab must have been unlinked from the cache. The
24711 * cache-lock is not held/needed.
24712 */
24713-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
24714+static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp,
24715+ bool delayed)
24716 {
24717 void *addr = slabp->s_mem - slabp->colouroff;
24718
24719@@ -2176,7 +2256,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
24720 slab_rcu->addr = addr;
24721 call_rcu(&slab_rcu->head, kmem_rcu_free);
24722 } else {
24723- kmem_freepages(cachep, addr);
24724+ kmem_freepages(cachep, addr, delayed);
24725 if (OFF_SLAB(cachep))
24726 kmem_cache_free(cachep->slabp_cache, slabp);
24727 }
24728@@ -2533,7 +2613,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
24729 #if DEBUG
24730 static void check_irq_off(void)
24731 {
24732- BUG_ON(!irqs_disabled());
24733+ BUG_ON_NONRT(!irqs_disabled());
24734 }
24735
24736 static void check_irq_on(void)
24737@@ -2568,26 +2648,43 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
24738 struct array_cache *ac,
24739 int force, int node);
24740
24741-static void do_drain(void *arg)
24742+static void __do_drain(void *arg, unsigned int cpu)
24743 {
24744 struct kmem_cache *cachep = arg;
24745 struct array_cache *ac;
24746- int node = numa_mem_id();
24747+ int node = cpu_to_mem(cpu);
24748
24749- check_irq_off();
24750- ac = cpu_cache_get(cachep);
24751+ ac = cpu_cache_get_on_cpu(cachep, cpu);
24752 spin_lock(&cachep->nodelists[node]->list_lock);
24753 free_block(cachep, ac->entry, ac->avail, node);
24754 spin_unlock(&cachep->nodelists[node]->list_lock);
24755 ac->avail = 0;
24756 }
24757
24758+#ifndef CONFIG_PREEMPT_RT_BASE
24759+static void do_drain(void *arg)
24760+{
24761+ __do_drain(arg, smp_processor_id());
24762+}
24763+#else
24764+static void do_drain(void *arg, int cpu)
24765+{
24766+ LIST_HEAD(tmp);
24767+
24768+ lock_slab_on(cpu);
24769+ __do_drain(arg, cpu);
24770+ list_splice_init(&per_cpu(slab_free_list, cpu), &tmp);
24771+ unlock_slab_on(cpu);
24772+ free_delayed(&tmp);
24773+}
24774+#endif
24775+
24776 static void drain_cpu_caches(struct kmem_cache *cachep)
24777 {
24778 struct kmem_list3 *l3;
24779 int node;
24780
24781- on_each_cpu(do_drain, cachep, 1);
24782+ slab_on_each_cpu(do_drain, cachep);
24783 check_irq_on();
24784 for_each_online_node(node) {
24785 l3 = cachep->nodelists[node];
24786@@ -2618,10 +2715,10 @@ static int drain_freelist(struct kmem_cache *cache,
24787 nr_freed = 0;
24788 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
24789
24790- spin_lock_irq(&l3->list_lock);
24791+ local_spin_lock_irq(slab_lock, &l3->list_lock);
24792 p = l3->slabs_free.prev;
24793 if (p == &l3->slabs_free) {
24794- spin_unlock_irq(&l3->list_lock);
24795+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
24796 goto out;
24797 }
24798
24799@@ -2635,8 +2732,8 @@ static int drain_freelist(struct kmem_cache *cache,
24800 * to the cache.
24801 */
24802 l3->free_objects -= cache->num;
24803- spin_unlock_irq(&l3->list_lock);
24804- slab_destroy(cache, slabp);
24805+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
24806+ slab_destroy(cache, slabp, false);
24807 nr_freed++;
24808 }
24809 out:
24810@@ -2910,7 +3007,7 @@ static int cache_grow(struct kmem_cache *cachep,
24811 offset *= cachep->colour_off;
24812
24813 if (local_flags & __GFP_WAIT)
24814- local_irq_enable();
24815+ local_unlock_irq(slab_lock);
24816
24817 /*
24818 * The test for missing atomic flag is performed here, rather than
24819@@ -2940,7 +3037,7 @@ static int cache_grow(struct kmem_cache *cachep,
24820 cache_init_objs(cachep, slabp);
24821
24822 if (local_flags & __GFP_WAIT)
24823- local_irq_disable();
24824+ local_lock_irq(slab_lock);
24825 check_irq_off();
24826 spin_lock(&l3->list_lock);
24827
24828@@ -2951,10 +3048,10 @@ static int cache_grow(struct kmem_cache *cachep,
24829 spin_unlock(&l3->list_lock);
24830 return 1;
24831 opps1:
24832- kmem_freepages(cachep, objp);
24833+ kmem_freepages(cachep, objp, false);
24834 failed:
24835 if (local_flags & __GFP_WAIT)
24836- local_irq_disable();
24837+ local_lock_irq(slab_lock);
24838 return 0;
24839 }
24840
24841@@ -3368,11 +3465,11 @@ retry:
24842 * set and go into memory reserves if necessary.
24843 */
24844 if (local_flags & __GFP_WAIT)
24845- local_irq_enable();
24846+ local_unlock_irq(slab_lock);
24847 kmem_flagcheck(cache, flags);
24848 obj = kmem_getpages(cache, local_flags, numa_mem_id());
24849 if (local_flags & __GFP_WAIT)
24850- local_irq_disable();
24851+ local_lock_irq(slab_lock);
24852 if (obj) {
24853 /*
24854 * Insert into the appropriate per node queues
24855@@ -3492,7 +3589,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
24856 cachep = memcg_kmem_get_cache(cachep, flags);
24857
24858 cache_alloc_debugcheck_before(cachep, flags);
24859- local_irq_save(save_flags);
24860+ local_lock_irqsave(slab_lock, save_flags);
24861
24862 if (nodeid == NUMA_NO_NODE)
24863 nodeid = slab_node;
24864@@ -3517,7 +3614,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
24865 /* ___cache_alloc_node can fall back to other nodes */
24866 ptr = ____cache_alloc_node(cachep, flags, nodeid);
24867 out:
24868- local_irq_restore(save_flags);
24869+ local_unlock_irqrestore(slab_lock, save_flags);
24870 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
24871 kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
24872 flags);
24873@@ -3579,9 +3676,9 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
24874 cachep = memcg_kmem_get_cache(cachep, flags);
24875
24876 cache_alloc_debugcheck_before(cachep, flags);
24877- local_irq_save(save_flags);
24878+ local_lock_irqsave(slab_lock, save_flags);
24879 objp = __do_cache_alloc(cachep, flags);
24880- local_irq_restore(save_flags);
24881+ local_unlock_irqrestore(slab_lock, save_flags);
24882 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
24883 kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
24884 flags);
24885@@ -3632,7 +3729,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
24886 * a different cache, refer to comments before
24887 * alloc_slabmgmt.
24888 */
24889- slab_destroy(cachep, slabp);
24890+ slab_destroy(cachep, slabp, true);
24891 } else {
24892 list_add(&slabp->list, &l3->slabs_free);
24893 }
24894@@ -3895,12 +3992,12 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
24895 if (!cachep)
24896 return;
24897
24898- local_irq_save(flags);
24899 debug_check_no_locks_freed(objp, cachep->object_size);
24900 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
24901 debug_check_no_obj_freed(objp, cachep->object_size);
24902+ local_lock_irqsave(slab_lock, flags);
24903 __cache_free(cachep, objp, _RET_IP_);
24904- local_irq_restore(flags);
24905+ unlock_slab_and_free_delayed(flags);
24906
24907 trace_kmem_cache_free(_RET_IP_, objp);
24908 }
24909@@ -3924,14 +4021,14 @@ void kfree(const void *objp)
24910
24911 if (unlikely(ZERO_OR_NULL_PTR(objp)))
24912 return;
24913- local_irq_save(flags);
24914 kfree_debugcheck(objp);
24915 c = virt_to_cache(objp);
24916 debug_check_no_locks_freed(objp, c->object_size);
24917
24918 debug_check_no_obj_freed(objp, c->object_size);
24919+ local_lock_irqsave(slab_lock, flags);
24920 __cache_free(c, (void *)objp, _RET_IP_);
24921- local_irq_restore(flags);
24922+ unlock_slab_and_free_delayed(flags);
24923 }
24924 EXPORT_SYMBOL(kfree);
24925
24926@@ -3968,7 +4065,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
24927 if (l3) {
24928 struct array_cache *shared = l3->shared;
24929
24930- spin_lock_irq(&l3->list_lock);
24931+ local_spin_lock_irq(slab_lock, &l3->list_lock);
24932
24933 if (shared)
24934 free_block(cachep, shared->entry,
24935@@ -3981,7 +4078,8 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
24936 }
24937 l3->free_limit = (1 + nr_cpus_node(node)) *
24938 cachep->batchcount + cachep->num;
24939- spin_unlock_irq(&l3->list_lock);
24940+ unlock_l3_and_free_delayed(&l3->list_lock);
24941+
24942 kfree(shared);
24943 free_alien_cache(new_alien);
24944 continue;
24945@@ -4028,18 +4126,29 @@ struct ccupdate_struct {
24946 struct array_cache *new[0];
24947 };
24948
24949-static void do_ccupdate_local(void *info)
24950+static void __do_ccupdate_local(void *info, int cpu)
24951 {
24952 struct ccupdate_struct *new = info;
24953 struct array_cache *old;
24954
24955- check_irq_off();
24956- old = cpu_cache_get(new->cachep);
24957+ old = cpu_cache_get_on_cpu(new->cachep, cpu);
24958
24959- new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
24960- new->new[smp_processor_id()] = old;
24961+ new->cachep->array[cpu] = new->new[cpu];
24962+ new->new[cpu] = old;
24963 }
24964
24965+#ifndef CONFIG_PREEMPT_RT_BASE
24966+static void do_ccupdate_local(void *info)
24967+{
24968+ __do_ccupdate_local(info, smp_processor_id());
24969+}
24970+#else
24971+static void do_ccupdate_local(void *info, int cpu)
24972+{
24973+ __do_ccupdate_local(info, cpu);
24974+}
24975+#endif
24976+
24977 /* Always called with the slab_mutex held */
24978 static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
24979 int batchcount, int shared, gfp_t gfp)
24980@@ -4064,7 +4173,7 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
24981 }
24982 new->cachep = cachep;
24983
24984- on_each_cpu(do_ccupdate_local, (void *)new, 1);
24985+ slab_on_each_cpu(do_ccupdate_local, (void *)new);
24986
24987 check_irq_on();
24988 cachep->batchcount = batchcount;
24989@@ -4075,9 +4184,11 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
24990 struct array_cache *ccold = new->new[i];
24991 if (!ccold)
24992 continue;
24993- spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
24994+ local_spin_lock_irq(slab_lock,
24995+ &cachep->nodelists[cpu_to_mem(i)]->list_lock);
24996 free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
24997- spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
24998+
24999+ unlock_l3_and_free_delayed(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
25000 kfree(ccold);
25001 }
25002 kfree(new);
25003@@ -4192,7 +4303,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
25004 if (ac->touched && !force) {
25005 ac->touched = 0;
25006 } else {
25007- spin_lock_irq(&l3->list_lock);
25008+ local_spin_lock_irq(slab_lock, &l3->list_lock);
25009 if (ac->avail) {
25010 tofree = force ? ac->avail : (ac->limit + 4) / 5;
25011 if (tofree > ac->avail)
25012@@ -4202,7 +4313,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
25013 memmove(ac->entry, &(ac->entry[tofree]),
25014 sizeof(void *) * ac->avail);
25015 }
25016- spin_unlock_irq(&l3->list_lock);
25017+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
25018 }
25019 }
25020
25021@@ -4295,7 +4406,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
25022 continue;
25023
25024 check_irq_on();
25025- spin_lock_irq(&l3->list_lock);
25026+ local_spin_lock_irq(slab_lock, &l3->list_lock);
25027
25028 list_for_each_entry(slabp, &l3->slabs_full, list) {
25029 if (slabp->inuse != cachep->num && !error)
25030@@ -4320,7 +4431,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
25031 if (l3->shared)
25032 shared_avail += l3->shared->avail;
25033
25034- spin_unlock_irq(&l3->list_lock);
25035+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
25036 }
25037 num_slabs += active_slabs;
25038 num_objs = num_slabs * cachep->num;
25039@@ -4520,13 +4631,13 @@ static int leaks_show(struct seq_file *m, void *p)
25040 continue;
25041
25042 check_irq_on();
25043- spin_lock_irq(&l3->list_lock);
25044+ local_spin_lock_irq(slab_lock, &l3->list_lock);
25045
25046 list_for_each_entry(slabp, &l3->slabs_full, list)
25047 handle_slab(n, cachep, slabp);
25048 list_for_each_entry(slabp, &l3->slabs_partial, list)
25049 handle_slab(n, cachep, slabp);
25050- spin_unlock_irq(&l3->list_lock);
25051+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
25052 }
25053 name = cachep->name;
25054 if (n[0] == n[1]) {
25055diff --git a/mm/slub.c b/mm/slub.c
25056index ba2ca53..f6871c5 100644
25057--- a/mm/slub.c
25058+++ b/mm/slub.c
25059@@ -1070,7 +1070,7 @@ static noinline struct kmem_cache_node *free_debug_processing(
25060 {
25061 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
25062
25063- spin_lock_irqsave(&n->list_lock, *flags);
25064+ raw_spin_lock_irqsave(&n->list_lock, *flags);
25065 slab_lock(page);
25066
25067 if (!check_slab(s, page))
25068@@ -1118,7 +1118,7 @@ out:
25069
25070 fail:
25071 slab_unlock(page);
25072- spin_unlock_irqrestore(&n->list_lock, *flags);
25073+ raw_spin_unlock_irqrestore(&n->list_lock, *flags);
25074 slab_fix(s, "Object at 0x%p not freed", object);
25075 return NULL;
25076 }
25077@@ -1253,6 +1253,12 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
25078
25079 #endif /* CONFIG_SLUB_DEBUG */
25080
25081+struct slub_free_list {
25082+ raw_spinlock_t lock;
25083+ struct list_head list;
25084+};
25085+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
25086+
25087 /*
25088 * Slab allocation and freeing
25089 */
25090@@ -1274,10 +1280,15 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
25091 struct page *page;
25092 struct kmem_cache_order_objects oo = s->oo;
25093 gfp_t alloc_gfp;
25094+ bool enableirqs;
25095
25096 flags &= gfp_allowed_mask;
25097
25098- if (flags & __GFP_WAIT)
25099+ enableirqs = (flags & __GFP_WAIT) != 0;
25100+#ifdef CONFIG_PREEMPT_RT_FULL
25101+ enableirqs |= system_state == SYSTEM_RUNNING;
25102+#endif
25103+ if (enableirqs)
25104 local_irq_enable();
25105
25106 flags |= s->allocflags;
25107@@ -1317,7 +1328,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
25108 kmemcheck_mark_unallocated_pages(page, pages);
25109 }
25110
25111- if (flags & __GFP_WAIT)
25112+ if (enableirqs)
25113 local_irq_disable();
25114 if (!page)
25115 return NULL;
25116@@ -1335,8 +1346,10 @@ static void setup_object(struct kmem_cache *s, struct page *page,
25117 void *object)
25118 {
25119 setup_object_debug(s, page, object);
25120+#ifndef CONFIG_PREEMPT_RT_FULL
25121 if (unlikely(s->ctor))
25122 s->ctor(object);
25123+#endif
25124 }
25125
25126 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
25127@@ -1414,6 +1427,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
25128 __free_memcg_kmem_pages(page, order);
25129 }
25130
25131+static void free_delayed(struct kmem_cache *s, struct list_head *h)
25132+{
25133+ while(!list_empty(h)) {
25134+ struct page *page = list_first_entry(h, struct page, lru);
25135+
25136+ list_del(&page->lru);
25137+ __free_slab(s, page);
25138+ }
25139+}
25140+
25141 #define need_reserve_slab_rcu \
25142 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
25143
25144@@ -1448,6 +1471,12 @@ static void free_slab(struct kmem_cache *s, struct page *page)
25145 }
25146
25147 call_rcu(head, rcu_free_slab);
25148+ } else if (irqs_disabled()) {
25149+ struct slub_free_list *f = &__get_cpu_var(slub_free_list);
25150+
25151+ raw_spin_lock(&f->lock);
25152+ list_add(&page->lru, &f->list);
25153+ raw_spin_unlock(&f->lock);
25154 } else
25155 __free_slab(s, page);
25156 }
25157@@ -1549,7 +1578,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
25158 if (!n || !n->nr_partial)
25159 return NULL;
25160
25161- spin_lock(&n->list_lock);
25162+ raw_spin_lock(&n->list_lock);
25163 list_for_each_entry_safe(page, page2, &n->partial, lru) {
25164 void *t;
25165 int available;
25166@@ -1574,7 +1603,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
25167 break;
25168
25169 }
25170- spin_unlock(&n->list_lock);
25171+ raw_spin_unlock(&n->list_lock);
25172 return object;
25173 }
25174
25175@@ -1816,7 +1845,7 @@ redo:
25176 * that acquire_slab() will see a slab page that
25177 * is frozen
25178 */
25179- spin_lock(&n->list_lock);
25180+ raw_spin_lock(&n->list_lock);
25181 }
25182 } else {
25183 m = M_FULL;
25184@@ -1827,7 +1856,7 @@ redo:
25185 * slabs from diagnostic functions will not see
25186 * any frozen slabs.
25187 */
25188- spin_lock(&n->list_lock);
25189+ raw_spin_lock(&n->list_lock);
25190 }
25191 }
25192
25193@@ -1862,7 +1891,7 @@ redo:
25194 goto redo;
25195
25196 if (lock)
25197- spin_unlock(&n->list_lock);
25198+ raw_spin_unlock(&n->list_lock);
25199
25200 if (m == M_FREE) {
25201 stat(s, DEACTIVATE_EMPTY);
25202@@ -1893,10 +1922,10 @@ static void unfreeze_partials(struct kmem_cache *s,
25203 n2 = get_node(s, page_to_nid(page));
25204 if (n != n2) {
25205 if (n)
25206- spin_unlock(&n->list_lock);
25207+ raw_spin_unlock(&n->list_lock);
25208
25209 n = n2;
25210- spin_lock(&n->list_lock);
25211+ raw_spin_lock(&n->list_lock);
25212 }
25213
25214 do {
25215@@ -1925,7 +1954,7 @@ static void unfreeze_partials(struct kmem_cache *s,
25216 }
25217
25218 if (n)
25219- spin_unlock(&n->list_lock);
25220+ raw_spin_unlock(&n->list_lock);
25221
25222 while (discard_page) {
25223 page = discard_page;
25224@@ -1961,14 +1990,21 @@ static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
25225 pobjects = oldpage->pobjects;
25226 pages = oldpage->pages;
25227 if (drain && pobjects > s->cpu_partial) {
25228+ struct slub_free_list *f;
25229 unsigned long flags;
25230+ LIST_HEAD(tofree);
25231 /*
25232 * partial array is full. Move the existing
25233 * set to the per node partial list.
25234 */
25235 local_irq_save(flags);
25236 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
25237+ f = &__get_cpu_var(slub_free_list);
25238+ raw_spin_lock(&f->lock);
25239+ list_splice_init(&f->list, &tofree);
25240+ raw_spin_unlock(&f->lock);
25241 local_irq_restore(flags);
25242+ free_delayed(s, &tofree);
25243 oldpage = NULL;
25244 pobjects = 0;
25245 pages = 0;
25246@@ -2031,7 +2067,22 @@ static bool has_cpu_slab(int cpu, void *info)
25247
25248 static void flush_all(struct kmem_cache *s)
25249 {
25250+ LIST_HEAD(tofree);
25251+ int cpu;
25252+
25253 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
25254+ for_each_online_cpu(cpu) {
25255+ struct slub_free_list *f;
25256+
25257+ if (!has_cpu_slab(cpu, s))
25258+ continue;
25259+
25260+ f = &per_cpu(slub_free_list, cpu);
25261+ raw_spin_lock_irq(&f->lock);
25262+ list_splice_init(&f->list, &tofree);
25263+ raw_spin_unlock_irq(&f->lock);
25264+ free_delayed(s, &tofree);
25265+ }
25266 }
25267
25268 /*
25269@@ -2041,7 +2092,7 @@ static void flush_all(struct kmem_cache *s)
25270 static inline int node_match(struct page *page, int node)
25271 {
25272 #ifdef CONFIG_NUMA
25273- if (node != NUMA_NO_NODE && page_to_nid(page) != node)
25274+ if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
25275 return 0;
25276 #endif
25277 return 1;
25278@@ -2059,10 +2110,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
25279 unsigned long x = 0;
25280 struct page *page;
25281
25282- spin_lock_irqsave(&n->list_lock, flags);
25283+ raw_spin_lock_irqsave(&n->list_lock, flags);
25284 list_for_each_entry(page, &n->partial, lru)
25285 x += get_count(page);
25286- spin_unlock_irqrestore(&n->list_lock, flags);
25287+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
25288 return x;
25289 }
25290
25291@@ -2205,9 +2256,11 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
25292 static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
25293 unsigned long addr, struct kmem_cache_cpu *c)
25294 {
25295+ struct slub_free_list *f;
25296 void *freelist;
25297 struct page *page;
25298 unsigned long flags;
25299+ LIST_HEAD(tofree);
25300
25301 local_irq_save(flags);
25302 #ifdef CONFIG_PREEMPT
25303@@ -2270,7 +2323,13 @@ load_freelist:
25304 VM_BUG_ON(!c->page->frozen);
25305 c->freelist = get_freepointer(s, freelist);
25306 c->tid = next_tid(c->tid);
25307+out:
25308+ f = &__get_cpu_var(slub_free_list);
25309+ raw_spin_lock(&f->lock);
25310+ list_splice_init(&f->list, &tofree);
25311+ raw_spin_unlock(&f->lock);
25312 local_irq_restore(flags);
25313+ free_delayed(s, &tofree);
25314 return freelist;
25315
25316 new_slab:
25317@@ -2288,9 +2347,7 @@ new_slab:
25318 if (unlikely(!freelist)) {
25319 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
25320 slab_out_of_memory(s, gfpflags, node);
25321-
25322- local_irq_restore(flags);
25323- return NULL;
25324+ goto out;
25325 }
25326
25327 page = c->page;
25328@@ -2304,8 +2361,7 @@ new_slab:
25329 deactivate_slab(s, page, get_freepointer(s, freelist));
25330 c->page = NULL;
25331 c->freelist = NULL;
25332- local_irq_restore(flags);
25333- return freelist;
25334+ goto out;
25335 }
25336
25337 /*
25338@@ -2331,13 +2387,13 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
25339
25340 s = memcg_kmem_get_cache(s, gfpflags);
25341 redo:
25342-
25343 /*
25344- * Must read kmem_cache cpu data via this cpu ptr. Preemption is
25345- * enabled. We may switch back and forth between cpus while
25346- * reading from one cpu area. That does not matter as long
25347- * as we end up on the original cpu again when doing the cmpxchg.
25348+ * Preemption is disabled for the retrieval of the tid because that
25349+ * must occur from the current processor. We cannot allow rescheduling
25350+ * on a different processor between the determination of the pointer
25351+ * and the retrieval of the tid.
25352 */
25353+ preempt_disable();
25354 c = __this_cpu_ptr(s->cpu_slab);
25355
25356 /*
25357@@ -2347,7 +2403,7 @@ redo:
25358 * linked list in between.
25359 */
25360 tid = c->tid;
25361- barrier();
25362+ preempt_enable();
25363
25364 object = c->freelist;
25365 page = c->page;
25366@@ -2383,6 +2439,10 @@ redo:
25367
25368 if (unlikely(gfpflags & __GFP_ZERO) && object)
25369 memset(object, 0, s->object_size);
25370+#ifdef CONFIG_PREEMPT_RT_FULL
25371+ if (unlikely(s->ctor) && object)
25372+ s->ctor(object);
25373+#endif
25374
25375 slab_post_alloc_hook(s, gfpflags, object);
25376
25377@@ -2477,7 +2537,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
25378
25379 do {
25380 if (unlikely(n)) {
25381- spin_unlock_irqrestore(&n->list_lock, flags);
25382+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
25383 n = NULL;
25384 }
25385 prior = page->freelist;
25386@@ -2507,7 +2567,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
25387 * Otherwise the list_lock will synchronize with
25388 * other processors updating the list of slabs.
25389 */
25390- spin_lock_irqsave(&n->list_lock, flags);
25391+ raw_spin_lock_irqsave(&n->list_lock, flags);
25392
25393 }
25394 }
25395@@ -2548,7 +2608,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
25396 add_partial(n, page, DEACTIVATE_TO_TAIL);
25397 stat(s, FREE_ADD_PARTIAL);
25398 }
25399- spin_unlock_irqrestore(&n->list_lock, flags);
25400+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
25401 return;
25402
25403 slab_empty:
25404@@ -2562,7 +2622,7 @@ slab_empty:
25405 /* Slab must be on the full list */
25406 remove_full(s, page);
25407
25408- spin_unlock_irqrestore(&n->list_lock, flags);
25409+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
25410 stat(s, FREE_SLAB);
25411 discard_slab(s, page);
25412 }
25413@@ -2594,10 +2654,11 @@ redo:
25414 * data is retrieved via this pointer. If we are on the same cpu
25415 * during the cmpxchg then the free will succedd.
25416 */
25417+ preempt_disable();
25418 c = __this_cpu_ptr(s->cpu_slab);
25419
25420 tid = c->tid;
25421- barrier();
25422+ preempt_enable();
25423
25424 if (likely(page == c->page)) {
25425 set_freepointer(s, object, c->freelist);
25426@@ -2763,7 +2824,7 @@ static void
25427 init_kmem_cache_node(struct kmem_cache_node *n)
25428 {
25429 n->nr_partial = 0;
25430- spin_lock_init(&n->list_lock);
25431+ raw_spin_lock_init(&n->list_lock);
25432 INIT_LIST_HEAD(&n->partial);
25433 #ifdef CONFIG_SLUB_DEBUG
25434 atomic_long_set(&n->nr_slabs, 0);
25435@@ -3450,7 +3511,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
25436 for (i = 0; i < objects; i++)
25437 INIT_LIST_HEAD(slabs_by_inuse + i);
25438
25439- spin_lock_irqsave(&n->list_lock, flags);
25440+ raw_spin_lock_irqsave(&n->list_lock, flags);
25441
25442 /*
25443 * Build lists indexed by the items in use in each slab.
25444@@ -3471,7 +3532,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
25445 for (i = objects - 1; i > 0; i--)
25446 list_splice(slabs_by_inuse + i, n->partial.prev);
25447
25448- spin_unlock_irqrestore(&n->list_lock, flags);
25449+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
25450
25451 /* Release empty slabs */
25452 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
25453@@ -3641,6 +3702,12 @@ void __init kmem_cache_init(void)
25454 boot_kmem_cache_node;
25455 int i;
25456 int caches = 2;
25457+ int cpu;
25458+
25459+ for_each_possible_cpu(cpu) {
25460+ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
25461+ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
25462+ }
25463
25464 if (debug_guardpage_minorder())
25465 slub_max_order = 0;
25466@@ -4032,7 +4099,7 @@ static int validate_slab_node(struct kmem_cache *s,
25467 struct page *page;
25468 unsigned long flags;
25469
25470- spin_lock_irqsave(&n->list_lock, flags);
25471+ raw_spin_lock_irqsave(&n->list_lock, flags);
25472
25473 list_for_each_entry(page, &n->partial, lru) {
25474 validate_slab_slab(s, page, map);
25475@@ -4055,7 +4122,7 @@ static int validate_slab_node(struct kmem_cache *s,
25476 atomic_long_read(&n->nr_slabs));
25477
25478 out:
25479- spin_unlock_irqrestore(&n->list_lock, flags);
25480+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
25481 return count;
25482 }
25483
25484@@ -4245,12 +4312,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
25485 if (!atomic_long_read(&n->nr_slabs))
25486 continue;
25487
25488- spin_lock_irqsave(&n->list_lock, flags);
25489+ raw_spin_lock_irqsave(&n->list_lock, flags);
25490 list_for_each_entry(page, &n->partial, lru)
25491 process_slab(&t, s, page, alloc, map);
25492 list_for_each_entry(page, &n->full, lru)
25493 process_slab(&t, s, page, alloc, map);
25494- spin_unlock_irqrestore(&n->list_lock, flags);
25495+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
25496 }
25497
25498 for (i = 0; i < t.count; i++) {
25499diff --git a/mm/swap.c b/mm/swap.c
25500index 6310dc2..5812f96 100644
25501--- a/mm/swap.c
25502+++ b/mm/swap.c
25503@@ -30,6 +30,7 @@
25504 #include <linux/backing-dev.h>
25505 #include <linux/memcontrol.h>
25506 #include <linux/gfp.h>
25507+#include <linux/locallock.h>
25508
25509 #include "internal.h"
25510
25511@@ -40,6 +41,9 @@ static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
25512 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
25513 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
25514
25515+static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
25516+static DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
25517+
25518 /*
25519 * This path almost never happens for VM activity - pages are normally
25520 * freed via pagevecs. But it gets used by networking.
25521@@ -354,11 +358,11 @@ void rotate_reclaimable_page(struct page *page)
25522 unsigned long flags;
25523
25524 page_cache_get(page);
25525- local_irq_save(flags);
25526+ local_lock_irqsave(rotate_lock, flags);
25527 pvec = &__get_cpu_var(lru_rotate_pvecs);
25528 if (!pagevec_add(pvec, page))
25529 pagevec_move_tail(pvec);
25530- local_irq_restore(flags);
25531+ local_unlock_irqrestore(rotate_lock, flags);
25532 }
25533 }
25534
25535@@ -403,12 +407,13 @@ static void activate_page_drain(int cpu)
25536 void activate_page(struct page *page)
25537 {
25538 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
25539- struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
25540+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
25541+ activate_page_pvecs);
25542
25543 page_cache_get(page);
25544 if (!pagevec_add(pvec, page))
25545 pagevec_lru_move_fn(pvec, __activate_page, NULL);
25546- put_cpu_var(activate_page_pvecs);
25547+ put_locked_var(swapvec_lock, activate_page_pvecs);
25548 }
25549 }
25550
25551@@ -456,13 +461,13 @@ EXPORT_SYMBOL(mark_page_accessed);
25552 */
25553 void __lru_cache_add(struct page *page, enum lru_list lru)
25554 {
25555- struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
25556+ struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvecs)[lru];
25557
25558 page_cache_get(page);
25559 if (!pagevec_space(pvec))
25560 __pagevec_lru_add(pvec, lru);
25561 pagevec_add(pvec, page);
25562- put_cpu_var(lru_add_pvecs);
25563+ put_locked_var(swapvec_lock, lru_add_pvecs);
25564 }
25565 EXPORT_SYMBOL(__lru_cache_add);
25566
25567@@ -597,9 +602,9 @@ void lru_add_drain_cpu(int cpu)
25568 unsigned long flags;
25569
25570 /* No harm done if a racing interrupt already did this */
25571- local_irq_save(flags);
25572+ local_lock_irqsave(rotate_lock, flags);
25573 pagevec_move_tail(pvec);
25574- local_irq_restore(flags);
25575+ local_unlock_irqrestore(rotate_lock, flags);
25576 }
25577
25578 pvec = &per_cpu(lru_deactivate_pvecs, cpu);
25579@@ -627,18 +632,19 @@ void deactivate_page(struct page *page)
25580 return;
25581
25582 if (likely(get_page_unless_zero(page))) {
25583- struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
25584+ struct pagevec *pvec = &get_locked_var(swapvec_lock,
25585+ lru_deactivate_pvecs);
25586
25587 if (!pagevec_add(pvec, page))
25588 pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
25589- put_cpu_var(lru_deactivate_pvecs);
25590+ put_locked_var(swapvec_lock, lru_deactivate_pvecs);
25591 }
25592 }
25593
25594 void lru_add_drain(void)
25595 {
25596- lru_add_drain_cpu(get_cpu());
25597- put_cpu();
25598+ lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
25599+ local_unlock_cpu(swapvec_lock);
25600 }
25601
25602 static void lru_add_drain_per_cpu(struct work_struct *dummy)
25603diff --git a/mm/vmalloc.c b/mm/vmalloc.c
25604index 5123a16..f4b4fee 100644
25605--- a/mm/vmalloc.c
25606+++ b/mm/vmalloc.c
25607@@ -782,7 +782,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
25608 struct vmap_block *vb;
25609 struct vmap_area *va;
25610 unsigned long vb_idx;
25611- int node, err;
25612+ int node, err, cpu;
25613
25614 node = numa_node_id();
25615
25616@@ -821,12 +821,13 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
25617 BUG_ON(err);
25618 radix_tree_preload_end();
25619
25620- vbq = &get_cpu_var(vmap_block_queue);
25621+ cpu = get_cpu_light();
25622+ vbq = &__get_cpu_var(vmap_block_queue);
25623 vb->vbq = vbq;
25624 spin_lock(&vbq->lock);
25625 list_add_rcu(&vb->free_list, &vbq->free);
25626 spin_unlock(&vbq->lock);
25627- put_cpu_var(vmap_block_queue);
25628+ put_cpu_light();
25629
25630 return vb;
25631 }
25632@@ -900,7 +901,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
25633 struct vmap_block *vb;
25634 unsigned long addr = 0;
25635 unsigned int order;
25636- int purge = 0;
25637+ int purge = 0, cpu;
25638
25639 BUG_ON(size & ~PAGE_MASK);
25640 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
25641@@ -916,7 +917,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
25642
25643 again:
25644 rcu_read_lock();
25645- vbq = &get_cpu_var(vmap_block_queue);
25646+ cpu = get_cpu_light();
25647+ vbq = &__get_cpu_var(vmap_block_queue);
25648 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
25649 int i;
25650
25651@@ -953,7 +955,7 @@ next:
25652 if (purge)
25653 purge_fragmented_blocks_thiscpu();
25654
25655- put_cpu_var(vmap_block_queue);
25656+ put_cpu_light();
25657 rcu_read_unlock();
25658
25659 if (!addr) {
25660diff --git a/mm/vmstat.c b/mm/vmstat.c
25661index 9800306..a4dbd77 100644
25662--- a/mm/vmstat.c
25663+++ b/mm/vmstat.c
25664@@ -216,6 +216,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
25665 long x;
25666 long t;
25667
25668+ preempt_disable_rt();
25669 x = delta + __this_cpu_read(*p);
25670
25671 t = __this_cpu_read(pcp->stat_threshold);
25672@@ -225,6 +226,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
25673 x = 0;
25674 }
25675 __this_cpu_write(*p, x);
25676+ preempt_enable_rt();
25677 }
25678 EXPORT_SYMBOL(__mod_zone_page_state);
25679
25680@@ -257,6 +259,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
25681 s8 __percpu *p = pcp->vm_stat_diff + item;
25682 s8 v, t;
25683
25684+ preempt_disable_rt();
25685 v = __this_cpu_inc_return(*p);
25686 t = __this_cpu_read(pcp->stat_threshold);
25687 if (unlikely(v > t)) {
25688@@ -265,6 +268,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
25689 zone_page_state_add(v + overstep, zone, item);
25690 __this_cpu_write(*p, -overstep);
25691 }
25692+ preempt_enable_rt();
25693 }
25694
25695 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
25696@@ -279,6 +283,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
25697 s8 __percpu *p = pcp->vm_stat_diff + item;
25698 s8 v, t;
25699
25700+ preempt_disable_rt();
25701 v = __this_cpu_dec_return(*p);
25702 t = __this_cpu_read(pcp->stat_threshold);
25703 if (unlikely(v < - t)) {
25704@@ -287,6 +292,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
25705 zone_page_state_add(v - overstep, zone, item);
25706 __this_cpu_write(*p, overstep);
25707 }
25708+ preempt_enable_rt();
25709 }
25710
25711 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
25712diff --git a/net/core/dev.c b/net/core/dev.c
25713index d592214..2593a482 100644
25714--- a/net/core/dev.c
25715+++ b/net/core/dev.c
25716@@ -203,7 +203,7 @@ static struct list_head offload_base __read_mostly;
25717 DEFINE_RWLOCK(dev_base_lock);
25718 EXPORT_SYMBOL(dev_base_lock);
25719
25720-seqcount_t devnet_rename_seq;
25721+DEFINE_MUTEX(devnet_rename_mutex);
25722
25723 static inline void dev_base_seq_inc(struct net *net)
25724 {
25725@@ -225,14 +225,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
25726 static inline void rps_lock(struct softnet_data *sd)
25727 {
25728 #ifdef CONFIG_RPS
25729- spin_lock(&sd->input_pkt_queue.lock);
25730+ raw_spin_lock(&sd->input_pkt_queue.raw_lock);
25731 #endif
25732 }
25733
25734 static inline void rps_unlock(struct softnet_data *sd)
25735 {
25736 #ifdef CONFIG_RPS
25737- spin_unlock(&sd->input_pkt_queue.lock);
25738+ raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
25739 #endif
25740 }
25741
25742@@ -1093,10 +1093,11 @@ int dev_change_name(struct net_device *dev, const char *newname)
25743 if (dev->flags & IFF_UP)
25744 return -EBUSY;
25745
25746- write_seqcount_begin(&devnet_rename_seq);
25747+
25748+ mutex_lock(&devnet_rename_mutex);
25749
25750 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
25751- write_seqcount_end(&devnet_rename_seq);
25752+ mutex_unlock(&devnet_rename_mutex);
25753 return 0;
25754 }
25755
25756@@ -1104,7 +1105,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
25757
25758 err = dev_get_valid_name(net, dev, newname);
25759 if (err < 0) {
25760- write_seqcount_end(&devnet_rename_seq);
25761+ mutex_unlock(&devnet_rename_mutex);
25762 return err;
25763 }
25764
25765@@ -1112,11 +1113,11 @@ rollback:
25766 ret = device_rename(&dev->dev, dev->name);
25767 if (ret) {
25768 memcpy(dev->name, oldname, IFNAMSIZ);
25769- write_seqcount_end(&devnet_rename_seq);
25770+ mutex_unlock(&devnet_rename_mutex);
25771 return ret;
25772 }
25773
25774- write_seqcount_end(&devnet_rename_seq);
25775+ mutex_unlock(&devnet_rename_mutex);
25776
25777 write_lock_bh(&dev_base_lock);
25778 hlist_del_rcu(&dev->name_hlist);
25779@@ -1135,7 +1136,7 @@ rollback:
25780 /* err >= 0 after dev_alloc_name() or stores the first errno */
25781 if (err >= 0) {
25782 err = ret;
25783- write_seqcount_begin(&devnet_rename_seq);
25784+ mutex_lock(&devnet_rename_mutex);
25785 memcpy(dev->name, oldname, IFNAMSIZ);
25786 goto rollback;
25787 } else {
25788@@ -1946,6 +1947,7 @@ static inline void __netif_reschedule(struct Qdisc *q)
25789 sd->output_queue_tailp = &q->next_sched;
25790 raise_softirq_irqoff(NET_TX_SOFTIRQ);
25791 local_irq_restore(flags);
25792+ preempt_check_resched_rt();
25793 }
25794
25795 void __netif_schedule(struct Qdisc *q)
25796@@ -1967,6 +1969,7 @@ void dev_kfree_skb_irq(struct sk_buff *skb)
25797 sd->completion_queue = skb;
25798 raise_softirq_irqoff(NET_TX_SOFTIRQ);
25799 local_irq_restore(flags);
25800+ preempt_check_resched_rt();
25801 }
25802 }
25803 EXPORT_SYMBOL(dev_kfree_skb_irq);
25804@@ -3055,6 +3058,7 @@ enqueue:
25805 rps_unlock(sd);
25806
25807 local_irq_restore(flags);
25808+ preempt_check_resched_rt();
25809
25810 atomic_long_inc(&skb->dev->rx_dropped);
25811 kfree_skb(skb);
25812@@ -3092,7 +3096,7 @@ int netif_rx(struct sk_buff *skb)
25813 struct rps_dev_flow voidflow, *rflow = &voidflow;
25814 int cpu;
25815
25816- preempt_disable();
25817+ migrate_disable();
25818 rcu_read_lock();
25819
25820 cpu = get_rps_cpu(skb->dev, skb, &rflow);
25821@@ -3102,13 +3106,13 @@ int netif_rx(struct sk_buff *skb)
25822 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
25823
25824 rcu_read_unlock();
25825- preempt_enable();
25826+ migrate_enable();
25827 } else
25828 #endif
25829 {
25830 unsigned int qtail;
25831- ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
25832- put_cpu();
25833+ ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
25834+ put_cpu_light();
25835 }
25836 return ret;
25837 }
25838@@ -3118,16 +3122,44 @@ int netif_rx_ni(struct sk_buff *skb)
25839 {
25840 int err;
25841
25842- preempt_disable();
25843+ local_bh_disable();
25844 err = netif_rx(skb);
25845- if (local_softirq_pending())
25846- do_softirq();
25847- preempt_enable();
25848+ local_bh_enable();
25849
25850 return err;
25851 }
25852 EXPORT_SYMBOL(netif_rx_ni);
25853
25854+#ifdef CONFIG_PREEMPT_RT_FULL
25855+/*
25856+ * RT runs ksoftirqd as a real time thread and the root_lock is a
25857+ * "sleeping spinlock". If the trylock fails then we can go into an
25858+ * infinite loop when ksoftirqd preempted the task which actually
25859+ * holds the lock, because we requeue q and raise NET_TX softirq
25860+ * causing ksoftirqd to loop forever.
25861+ *
25862+ * It's safe to use spin_lock on RT here as softirqs run in thread
25863+ * context and cannot deadlock against the thread which is holding
25864+ * root_lock.
25865+ *
25866+ * On !RT the trylock might fail, but there we bail out from the
25867+ * softirq loop after 10 attempts which we can't do on RT. And the
25868+ * task holding root_lock cannot be preempted, so the only downside of
25869+ * that trylock is that we need 10 loops to decide that we should have
25870+ * given up in the first one :)
25871+ */
25872+static inline int take_root_lock(spinlock_t *lock)
25873+{
25874+ spin_lock(lock);
25875+ return 1;
25876+}
25877+#else
25878+static inline int take_root_lock(spinlock_t *lock)
25879+{
25880+ return spin_trylock(lock);
25881+}
25882+#endif
25883+
25884 static void net_tx_action(struct softirq_action *h)
25885 {
25886 struct softnet_data *sd = &__get_cpu_var(softnet_data);
25887@@ -3166,7 +3198,7 @@ static void net_tx_action(struct softirq_action *h)
25888 head = head->next_sched;
25889
25890 root_lock = qdisc_lock(q);
25891- if (spin_trylock(root_lock)) {
25892+ if (take_root_lock(root_lock)) {
25893 smp_mb__before_clear_bit();
25894 clear_bit(__QDISC_STATE_SCHED,
25895 &q->state);
25896@@ -3537,7 +3569,7 @@ static void flush_backlog(void *arg)
25897 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
25898 if (skb->dev == dev) {
25899 __skb_unlink(skb, &sd->input_pkt_queue);
25900- kfree_skb(skb);
25901+ __skb_queue_tail(&sd->tofree_queue, skb);
25902 input_queue_head_incr(sd);
25903 }
25904 }
25905@@ -3546,10 +3578,13 @@ static void flush_backlog(void *arg)
25906 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
25907 if (skb->dev == dev) {
25908 __skb_unlink(skb, &sd->process_queue);
25909- kfree_skb(skb);
25910+ __skb_queue_tail(&sd->tofree_queue, skb);
25911 input_queue_head_incr(sd);
25912 }
25913 }
25914+
25915+ if (!skb_queue_empty(&sd->tofree_queue))
25916+ raise_softirq_irqoff(NET_RX_SOFTIRQ);
25917 }
25918
25919 static int napi_gro_complete(struct sk_buff *skb)
25920@@ -3908,6 +3943,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
25921 } else
25922 #endif
25923 local_irq_enable();
25924+ preempt_check_resched_rt();
25925 }
25926
25927 static int process_backlog(struct napi_struct *napi, int quota)
25928@@ -3980,6 +4016,7 @@ void __napi_schedule(struct napi_struct *n)
25929 local_irq_save(flags);
25930 ____napi_schedule(&__get_cpu_var(softnet_data), n);
25931 local_irq_restore(flags);
25932+ preempt_check_resched_rt();
25933 }
25934 EXPORT_SYMBOL(__napi_schedule);
25935
25936@@ -4054,10 +4091,17 @@ static void net_rx_action(struct softirq_action *h)
25937 struct softnet_data *sd = &__get_cpu_var(softnet_data);
25938 unsigned long time_limit = jiffies + 2;
25939 int budget = netdev_budget;
25940+ struct sk_buff *skb;
25941 void *have;
25942
25943 local_irq_disable();
25944
25945+ while ((skb = __skb_dequeue(&sd->tofree_queue))) {
25946+ local_irq_enable();
25947+ kfree_skb(skb);
25948+ local_irq_disable();
25949+ }
25950+
25951 while (!list_empty(&sd->poll_list)) {
25952 struct napi_struct *n;
25953 int work, weight;
25954@@ -4180,7 +4224,6 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
25955 {
25956 struct net_device *dev;
25957 struct ifreq ifr;
25958- unsigned seq;
25959
25960 /*
25961 * Fetch the caller's info block.
25962@@ -4189,19 +4232,18 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
25963 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
25964 return -EFAULT;
25965
25966-retry:
25967- seq = read_seqcount_begin(&devnet_rename_seq);
25968+ mutex_lock(&devnet_rename_mutex);
25969 rcu_read_lock();
25970 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
25971 if (!dev) {
25972 rcu_read_unlock();
25973+ mutex_unlock(&devnet_rename_mutex);
25974 return -ENODEV;
25975 }
25976
25977 strcpy(ifr.ifr_name, dev->name);
25978 rcu_read_unlock();
25979- if (read_seqcount_retry(&devnet_rename_seq, seq))
25980- goto retry;
25981+ mutex_unlock(&devnet_rename_mutex);
25982
25983 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
25984 return -EFAULT;
25985@@ -6529,6 +6571,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
25986
25987 raise_softirq_irqoff(NET_TX_SOFTIRQ);
25988 local_irq_enable();
25989+ preempt_check_resched_rt();
25990
25991 /* Process offline CPU's input_pkt_queue */
25992 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
25993@@ -6539,6 +6582,9 @@ static int dev_cpu_callback(struct notifier_block *nfb,
25994 netif_rx(skb);
25995 input_queue_head_incr(oldsd);
25996 }
25997+ while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
25998+ kfree_skb(skb);
25999+ }
26000
26001 return NOTIFY_OK;
26002 }
26003@@ -6811,8 +6857,9 @@ static int __init net_dev_init(void)
26004 struct softnet_data *sd = &per_cpu(softnet_data, i);
26005
26006 memset(sd, 0, sizeof(*sd));
26007- skb_queue_head_init(&sd->input_pkt_queue);
26008- skb_queue_head_init(&sd->process_queue);
26009+ skb_queue_head_init_raw(&sd->input_pkt_queue);
26010+ skb_queue_head_init_raw(&sd->process_queue);
26011+ skb_queue_head_init_raw(&sd->tofree_queue);
26012 sd->completion_queue = NULL;
26013 INIT_LIST_HEAD(&sd->poll_list);
26014 sd->output_queue = NULL;
26015diff --git a/net/core/scm.c b/net/core/scm.c
26016index 2dc6cda..7b6d0d9 100644
26017--- a/net/core/scm.c
26018+++ b/net/core/scm.c
26019@@ -54,7 +54,7 @@ static __inline__ int scm_check_creds(struct ucred *creds)
26020 return -EINVAL;
26021
26022 if ((creds->pid == task_tgid_vnr(current) ||
26023- ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) &&
26024+ ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) &&
26025 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
26026 uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
26027 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
26028diff --git a/net/core/skbuff.c b/net/core/skbuff.c
26029index 32443eb..39b45c0 100644
26030--- a/net/core/skbuff.c
26031+++ b/net/core/skbuff.c
26032@@ -60,6 +60,7 @@
26033 #include <linux/scatterlist.h>
26034 #include <linux/errqueue.h>
26035 #include <linux/prefetch.h>
26036+#include <linux/locallock.h>
26037
26038 #include <net/protocol.h>
26039 #include <net/dst.h>
26040@@ -347,6 +348,7 @@ struct netdev_alloc_cache {
26041 unsigned int pagecnt_bias;
26042 };
26043 static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
26044+static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
26045
26046 #define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
26047 #define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
26048@@ -359,7 +361,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
26049 int order;
26050 unsigned long flags;
26051
26052- local_irq_save(flags);
26053+ local_lock_irqsave(netdev_alloc_lock, flags);
26054 nc = &__get_cpu_var(netdev_alloc_cache);
26055 if (unlikely(!nc->frag.page)) {
26056 refill:
26057@@ -393,7 +395,7 @@ recycle:
26058 nc->frag.offset += fragsz;
26059 nc->pagecnt_bias--;
26060 end:
26061- local_irq_restore(flags);
26062+ local_unlock_irqrestore(netdev_alloc_lock, flags);
26063 return data;
26064 }
26065
26066diff --git a/net/core/sock.c b/net/core/sock.c
26067index bc131d4..2754c99 100644
26068--- a/net/core/sock.c
26069+++ b/net/core/sock.c
26070@@ -571,7 +571,6 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
26071 struct net *net = sock_net(sk);
26072 struct net_device *dev;
26073 char devname[IFNAMSIZ];
26074- unsigned seq;
26075
26076 if (sk->sk_bound_dev_if == 0) {
26077 len = 0;
26078@@ -582,20 +581,19 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
26079 if (len < IFNAMSIZ)
26080 goto out;
26081
26082-retry:
26083- seq = read_seqcount_begin(&devnet_rename_seq);
26084+ mutex_lock(&devnet_rename_mutex);
26085 rcu_read_lock();
26086 dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
26087 ret = -ENODEV;
26088 if (!dev) {
26089 rcu_read_unlock();
26090+ mutex_unlock(&devnet_rename_mutex);
26091 goto out;
26092 }
26093
26094 strcpy(devname, dev->name);
26095 rcu_read_unlock();
26096- if (read_seqcount_retry(&devnet_rename_seq, seq))
26097- goto retry;
26098+ mutex_unlock(&devnet_rename_mutex);
26099
26100 len = strlen(devname) + 1;
26101
26102@@ -2287,12 +2285,11 @@ void lock_sock_nested(struct sock *sk, int subclass)
26103 if (sk->sk_lock.owned)
26104 __lock_sock(sk);
26105 sk->sk_lock.owned = 1;
26106- spin_unlock(&sk->sk_lock.slock);
26107+ spin_unlock_bh(&sk->sk_lock.slock);
26108 /*
26109 * The sk_lock has mutex_lock() semantics here:
26110 */
26111 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
26112- local_bh_enable();
26113 }
26114 EXPORT_SYMBOL(lock_sock_nested);
26115
26116diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
26117index 3ac5dff..d8bbe94 100644
26118--- a/net/ipv4/icmp.c
26119+++ b/net/ipv4/icmp.c
26120@@ -69,6 +69,7 @@
26121 #include <linux/jiffies.h>
26122 #include <linux/kernel.h>
26123 #include <linux/fcntl.h>
26124+#include <linux/sysrq.h>
26125 #include <linux/socket.h>
26126 #include <linux/in.h>
26127 #include <linux/inet.h>
26128@@ -768,6 +769,30 @@ static void icmp_redirect(struct sk_buff *skb)
26129 }
26130
26131 /*
26132+ * 32bit and 64bit have different timestamp length, so we check for
26133+ * the cookie at offset 20 and verify it is repeated at offset 50
26134+ */
26135+#define CO_POS0 20
26136+#define CO_POS1 50
26137+#define CO_SIZE sizeof(int)
26138+#define ICMP_SYSRQ_SIZE 57
26139+
26140+/*
26141+ * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
26142+ * pattern and if it matches send the next byte as a trigger to sysrq.
26143+ */
26144+static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
26145+{
26146+ int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
26147+ char *p = skb->data;
26148+
26149+ if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
26150+ !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
26151+ p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
26152+ handle_sysrq(p[CO_POS0 + CO_SIZE]);
26153+}
26154+
26155+/*
26156 * Handle ICMP_ECHO ("ping") requests.
26157 *
26158 * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
26159@@ -794,6 +819,11 @@ static void icmp_echo(struct sk_buff *skb)
26160 icmp_param.data_len = skb->len;
26161 icmp_param.head_len = sizeof(struct icmphdr);
26162 icmp_reply(&icmp_param, skb);
26163+
26164+ if (skb->len == ICMP_SYSRQ_SIZE &&
26165+ net->ipv4.sysctl_icmp_echo_sysrq) {
26166+ icmp_check_sysrq(net, skb);
26167+ }
26168 }
26169 }
26170
26171diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
26172index 3e98ed2..253692b 100644
26173--- a/net/ipv4/ip_output.c
26174+++ b/net/ipv4/ip_output.c
26175@@ -1508,7 +1508,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
26176 if (IS_ERR(rt))
26177 return;
26178
26179- inet = &get_cpu_var(unicast_sock);
26180+ get_cpu_light();
26181+ inet = &__get_cpu_var(unicast_sock);
26182
26183 inet->tos = arg->tos;
26184 sk = &inet->sk;
26185@@ -1532,7 +1533,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
26186 ip_push_pending_frames(sk, &fl4);
26187 }
26188
26189- put_cpu_var(unicast_sock);
26190+ put_cpu_light();
26191
26192 ip_rt_put(rt);
26193 }
26194diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
26195index d84400b..44bf3b0 100644
26196--- a/net/ipv4/sysctl_net_ipv4.c
26197+++ b/net/ipv4/sysctl_net_ipv4.c
26198@@ -815,6 +815,13 @@ static struct ctl_table ipv4_net_table[] = {
26199 .proc_handler = proc_dointvec
26200 },
26201 {
26202+ .procname = "icmp_echo_sysrq",
26203+ .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
26204+ .maxlen = sizeof(int),
26205+ .mode = 0644,
26206+ .proc_handler = proc_dointvec
26207+ },
26208+ {
26209 .procname = "icmp_ignore_bogus_error_responses",
26210 .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
26211 .maxlen = sizeof(int),
26212diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
26213index 580704e..c58f3cd 100644
26214--- a/net/mac80211/rx.c
26215+++ b/net/mac80211/rx.c
26216@@ -3144,7 +3144,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
26217 struct ieee80211_supported_band *sband;
26218 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
26219
26220- WARN_ON_ONCE(softirq_count() == 0);
26221+ WARN_ON_ONCE_NONRT(softirq_count() == 0);
26222
26223 if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
26224 goto drop;
26225diff --git a/net/netfilter/core.c b/net/netfilter/core.c
26226index a9c488b..c646ec8 100644
26227--- a/net/netfilter/core.c
26228+++ b/net/netfilter/core.c
26229@@ -20,11 +20,17 @@
26230 #include <linux/proc_fs.h>
26231 #include <linux/mutex.h>
26232 #include <linux/slab.h>
26233+#include <linux/locallock.h>
26234 #include <net/net_namespace.h>
26235 #include <net/sock.h>
26236
26237 #include "nf_internals.h"
26238
26239+#ifdef CONFIG_PREEMPT_RT_BASE
26240+DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
26241+EXPORT_PER_CPU_SYMBOL(xt_write_lock);
26242+#endif
26243+
26244 static DEFINE_MUTEX(afinfo_mutex);
26245
26246 const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
26247diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
26248index c111bd0..92a2359 100644
26249--- a/net/packet/af_packet.c
26250+++ b/net/packet/af_packet.c
26251@@ -88,6 +88,7 @@
26252 #include <linux/virtio_net.h>
26253 #include <linux/errqueue.h>
26254 #include <linux/net_tstamp.h>
26255+#include <linux/delay.h>
26256
26257 #ifdef CONFIG_INET
26258 #include <net/inet_common.h>
26259@@ -553,7 +554,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data)
26260 if (BLOCK_NUM_PKTS(pbd)) {
26261 while (atomic_read(&pkc->blk_fill_in_prog)) {
26262 /* Waiting for skb_copy_bits to finish... */
26263- cpu_relax();
26264+ cpu_chill();
26265 }
26266 }
26267
26268@@ -807,7 +808,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
26269 if (!(status & TP_STATUS_BLK_TMO)) {
26270 while (atomic_read(&pkc->blk_fill_in_prog)) {
26271 /* Waiting for skb_copy_bits to finish... */
26272- cpu_relax();
26273+ cpu_chill();
26274 }
26275 }
26276 prb_close_block(pkc, pbd, po, status);
26277diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
26278index e8fdb17..5a44c6e 100644
26279--- a/net/rds/ib_rdma.c
26280+++ b/net/rds/ib_rdma.c
26281@@ -34,6 +34,7 @@
26282 #include <linux/slab.h>
26283 #include <linux/rculist.h>
26284 #include <linux/llist.h>
26285+#include <linux/delay.h>
26286
26287 #include "rds.h"
26288 #include "ib.h"
26289@@ -286,7 +287,7 @@ static inline void wait_clean_list_grace(void)
26290 for_each_online_cpu(cpu) {
26291 flag = &per_cpu(clean_list_grace, cpu);
26292 while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
26293- cpu_relax();
26294+ cpu_chill();
26295 }
26296 }
26297
26298diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
26299index f221ddf..5f44009 100755
26300--- a/scripts/mkcompile_h
26301+++ b/scripts/mkcompile_h
26302@@ -4,7 +4,8 @@ TARGET=$1
26303 ARCH=$2
26304 SMP=$3
26305 PREEMPT=$4
26306-CC=$5
26307+RT=$5
26308+CC=$6
26309
26310 vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
26311
26312@@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION"
26313 CONFIG_FLAGS=""
26314 if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
26315 if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
26316+if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
26317 UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
26318
26319 # Truncate to maximum length
26320diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
26321index a20e320..0b0a907 100644
26322--- a/tools/lib/traceevent/Makefile
26323+++ b/tools/lib/traceevent/Makefile
26324@@ -122,7 +122,7 @@ export Q VERBOSE
26325
26326 EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
26327
26328-INCLUDES = -I. -I/usr/local/include $(CONFIG_INCLUDES)
26329+INCLUDES = -I. $(CONFIG_INCLUDES)
26330
26331 # Set compile option CFLAGS if not set elsewhere
26332 CFLAGS ?= -g -Wall
diff --git a/recipes-kernel/linux/linux-xlnx/preempt-rt.cfg b/recipes-kernel/linux/linux-xlnx/preempt-rt.cfg
new file mode 100644
index 00000000..a15c9302
--- /dev/null
+++ b/recipes-kernel/linux/linux-xlnx/preempt-rt.cfg
@@ -0,0 +1,3 @@
1CONFIG_PREEMPT_RT_FULL=y
2CONFIG_HZ_1000=y
3CONFIG_HZ=1000
diff --git a/recipes-kernel/linux/linux-xlnx_3.10.bb b/recipes-kernel/linux/linux-xlnx_3.10.bb
new file mode 100644
index 00000000..6df3aeb1
--- /dev/null
+++ b/recipes-kernel/linux/linux-xlnx_3.10.bb
@@ -0,0 +1,10 @@
1# See include file for common information
2include linux-xlnx.inc
3
4PR = "r1"
5
6# Kernel version and SRCREV correspond to:
7# xilinx-v14.7 tag
8LINUX_VERSION = "3.10"
9SRCREV = "efc27505715e64526653f35274717c0fc56491e3"
10
diff --git a/recipes-kernel/linux/linux-xlnx_3.8.bb b/recipes-kernel/linux/linux-xlnx_3.8.bb
new file mode 100644
index 00000000..3e55c1ff
--- /dev/null
+++ b/recipes-kernel/linux/linux-xlnx_3.8.bb
@@ -0,0 +1,13 @@
1KBRANCH = "xlnx_3.8"
2# See include file for common information
3include linux-xlnx.inc
4
5PR = "r1"
6
7# Kernel version and SRCREV correspond to: xlnx_3.8 branch
8LINUX_VERSION = "3.8"
9SRCREV = "f4ff79d44a966ebea6229213816d17eb472b303e"
10
11SRC_URI_append += " \
12 file://libtraceevent-Remove-hard-coded-include-to-usr-local.patch \
13 "
diff --git a/recipes-kernel/linux/linux-yocto-dev.bbappend b/recipes-kernel/linux/linux-yocto-dev.bbappend
new file mode 100644
index 00000000..b9b6b9e3
--- /dev/null
+++ b/recipes-kernel/linux/linux-yocto-dev.bbappend
@@ -0,0 +1,5 @@
1
2require linux-machine-common.inc
3
4COMPATIBLE_MACHINE_zynq = "zynq"
5COMPATIBLE_MACHINE_microblaze = "microblaze"
diff --git a/recipes-kernel/linux/linux-yocto/ec2eba55f0c0e74dd39aca14dcc597583cf1eb67.patch b/recipes-kernel/linux/linux-yocto/ec2eba55f0c0e74dd39aca14dcc597583cf1eb67.patch
new file mode 100644
index 00000000..de2df302
--- /dev/null
+++ b/recipes-kernel/linux/linux-yocto/ec2eba55f0c0e74dd39aca14dcc597583cf1eb67.patch
@@ -0,0 +1,70 @@
1From ec2eba55f0c0e74dd39aca14dcc597583cf1eb67 Mon Sep 17 00:00:00 2001
2From: Jason Wu <huanyu@xilinx.com>
3Date: Wed, 21 Aug 2013 07:10:32 +0200
4Subject: [PATCH] microblaze: Add linux.bin.ub target
5
6Currently the linux.bin target creates both linux.bin and linux.bin.ub.
7Add linux.bin.ub as separate target to generate linux.bin.ub.
8
9Signed-off-by: Jason Wu <huanyu@xilinx.com>
10Signed-off-by: Michal Simek <michal.simek@xilinx.com>
11Upstream-Status: Backport
12---
13 arch/microblaze/Makefile | 3 ++-
14 arch/microblaze/boot/Makefile | 7 ++++---
15 2 files changed, 6 insertions(+), 4 deletions(-)
16
17diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
18index 0a603d3..40350a3 100644
19--- a/arch/microblaze/Makefile
20+++ b/arch/microblaze/Makefile
21@@ -72,7 +72,7 @@ all: linux.bin
22 archclean:
23 $(Q)$(MAKE) $(clean)=$(boot)
24
25-linux.bin linux.bin.gz: vmlinux
26+linux.bin linux.bin.gz linux.bin.ub: vmlinux
27 $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
28
29 simpleImage.%: vmlinux
30@@ -81,6 +81,7 @@ simpleImage.%: vmlinux
31 define archhelp
32 echo '* linux.bin - Create raw binary'
33 echo ' linux.bin.gz - Create compressed raw binary'
34+ echo ' linux.bin.ub - Create U-Boot wrapped raw binary'
35 echo ' simpleImage.<dt> - ELF image with $(arch)/boot/dts/<dt>.dts linked in'
36 echo ' - stripped elf with fdt blob'
37 echo ' simpleImage.<dt>.unstrip - full ELF image with fdt blob'
38diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile
39index 80fe54f..8e211cc 100644
40--- a/arch/microblaze/boot/Makefile
41+++ b/arch/microblaze/boot/Makefile
42@@ -2,12 +2,15 @@
43 # arch/microblaze/boot/Makefile
44 #
45
46-targets := linux.bin linux.bin.gz simpleImage.%
47+targets := linux.bin linux.bin.gz linux.bin.ub simpleImage.%
48
49 OBJCOPYFLAGS := -R .note -R .comment -R .note.gnu.build-id -O binary
50
51 $(obj)/linux.bin: vmlinux FORCE
52 $(call if_changed,objcopy)
53+ @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
54+
55+$(obj)/linux.bin.ub: $(obj)/linux.bin FORCE
56 $(call if_changed,uimage)
57 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
58
59@@ -22,8 +25,6 @@ quiet_cmd_strip = STRIP $@
60 cmd_strip = $(STRIP) -K microblaze_start -K _end -K __log_buf \
61 -K _fdt_start vmlinux -o $@
62
63-UIMAGE_IN = $@
64-UIMAGE_OUT = $@.ub
65 UIMAGE_LOADADDR = $(CONFIG_KERNEL_BASE_ADDR)
66
67 $(obj)/simpleImage.%: vmlinux FORCE
68--
691.8.5.5
70
diff --git a/recipes-kernel/linux/linux-yocto_3.10.bbappend b/recipes-kernel/linux/linux-yocto_3.10.bbappend
new file mode 100644
index 00000000..3601d587
--- /dev/null
+++ b/recipes-kernel/linux/linux-yocto_3.10.bbappend
@@ -0,0 +1,8 @@
1
2require linux-machine-common.inc
3
4COMPATIBLE_MACHINE_zynq = "zynq"
5COMPATIBLE_MACHINE_microblaze = "microblaze"
6
7FILESEXTRAPATHS_prepend := "${THISDIR}/linux-yocto:"
8SRC_URI_append_microblaze += "file://ec2eba55f0c0e74dd39aca14dcc597583cf1eb67.patch"
diff --git a/recipes-kernel/linux/linux-yocto_3.14.bbappend b/recipes-kernel/linux/linux-yocto_3.14.bbappend
new file mode 100644
index 00000000..b9b6b9e3
--- /dev/null
+++ b/recipes-kernel/linux/linux-yocto_3.14.bbappend
@@ -0,0 +1,5 @@
1
2require linux-machine-common.inc
3
4COMPATIBLE_MACHINE_zynq = "zynq"
5COMPATIBLE_MACHINE_microblaze = "microblaze"