Revert "qemu: add 34Kf-64tlb fictitious cpu type"

This reverts commit 4470a04943352224955f17e004962f0f9e1c9b0c. Will be replaced with upstreamed solution instead, that just bumped number of TLBs to 64 in original 34Kf CPU model. (From OE-Core rev: 894f1d58d93073d290f35d1090b03717bc7b4dc0) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
author: Victor Kamensky <kamensky@cisco.com> 2020-10-19 15:21:45 -0700
committer: Richard Purdie <richard.purdie@linuxfoundation.org> 2020-10-20 11:11:46 +0100
commit: c2adcca4bf12a240a59dcffb2d729a03662c92f7 (patch)
tree: 32af959db887964352b08eebd9045df0b085217b /meta/recipes-devtools
parent: f27fa7ad97a8d4eed2e4e67cb404a67fa873a425 (diff)
download: poky-c2adcca4bf12a240a59dcffb2d729a03662c92f7.tar.gz
2 files changed, 0 insertions, 119 deletions
diff --git a/meta/recipes-devtools/qemu/qemu.inc b/meta/recipes-devtools/qemu/qemu.inc
index 6c0edcb706..bbb9038961 100644
--- a/meta/recipes-devtools/qemu/qemu.inc
+++ b/meta/recipes-devtools/qemu/qemu.inc
@@ -31,7 +31,6 @@ SRC_URI = "https://download.qemu.org/${BPN}-${PV}.tar.xz \
           file://0001-qemu-Do-not-include-file-if-not-exists.patch \
           file://find_datadir.patch \
           file://usb-fix-setup_len-init.patch \
-           file://0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch \
           "
 UPSTREAM_CHECK_REGEX = "qemu-(?P<pver>\d+(\.\d+)+)\.tar"
diff --git a/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch b/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch
deleted file mode 100644
index b6312e1543..0000000000
--- a/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch
+++ /dev/null
@@ -1,118 +0,0 @@
-From b3fcc7d96523ad8e3ea28c09d495ef08529d01ce Mon Sep 17 00:00:00 2001
-From: Victor Kamensky <kamensky@cisco.com>
-Date: Wed, 7 Oct 2020 10:19:42 -0700
-Subject: [PATCH] mips: add 34Kf-64tlb fictitious cpu type like 34Kf but with
- 64 TLBs
-In Yocto Project CI runs it was observed that test run
-of 32 bit mips image takes almost twice longer than 64 bit
-mips image with the same logical load and CI execution
-hits timeout.
-See https://bugzilla.yoctoproject.org/show_bug.cgi?id=13992
-Yocto project uses 34Kf cpu type to run 32 bit mips image,
-and MIPS64R2-generic cpu type to run 64 bit mips64 image.
-Upon qemu behavior differences investigation between mips
-and mips64 two prominent observations came up: under
-logically similar load (same definition and configuration
-of user-land image) in case of mips get_physical_address
-function is called almost twice more often, meaning
-twice more memory accesses involved in this case. Also
-number of tlbwr instruction executed (r4k_helper_tlbwr
-qemu function) almost 16 time bigger in mips case than in
-mips64.
-It turns out that 34Kf cpu has 16 TLBs, but in case of
-MIPS64R2-generic it is 64 TLBs. So that explains why
-some many more tlbwr had to be execute by kernel TLB refill
-handler in case of 32 bit misp.
-The idea of the fix is to come up with new 34Kf-64tlb fictitious
-cpu type, that would behave exactly as 34Kf but it would
-contain 64 TLBs to reduce TLB trashing. After all, adding
-more TLBs to soft mmu is easy.
-Experiment with some significant non-trvial load in Yocto
-environment by running do_testimage load shows that 34Kf-64tlb
-cpu performs 40% or so better than original 34Kf cpu wrt test
-execution real time.
-It is not ideal to have cpu type that does not exist in the
-wild but given performance gains it seems to be justified.
-Signed-off-by: Victor Kamensky <kamensky@cisco.com>
---
- target/mips/translate_init.inc.c | 55 ++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 55 insertions(+)
-diff --git a/target/mips/translate_init.inc.c b/target/mips/translate_init.inc.c
-index 637caccd89..b73ab48231 100644
--- a/target/mips/translate_init.inc.c
-+++ b/target/mips/translate_init.inc.c
-@@ -297,6 +297,61 @@ const mips_def_t mips_defs[] =
-         .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_MT,
-         .mmu_type = MMU_TYPE_R4000,
-     },
-+    /*
-+     * Verbatim copy of "34Kf" cpu, only bumped up number of TLB entries
-+     * from 16 to 64 (see CP0_Config0 value at CP0C1_MMU bits) to improve
-+     * performance by reducing number of TLB refill exceptions and
-+     * eliminating need to run all corresponding TLB refill handling
-+     * instructions.
-+     */
-+    {
-+        .name = "34Kf-64tlb",
-+        .CP0_PRid = 0x00019500,
-+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
-+                       (MMU_TYPE_R4000 << CP0C0_MT),
-+        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (63 << CP0C1_MMU) |
-+                       (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
-+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
-+                       (1 << CP0C1_CA),
-+        .CP0_Config2 = MIPS_CONFIG2,
-+        .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_VInt) | (1 << CP0C3_MT) |
-+                       (1 << CP0C3_DSPP),
-+        .CP0_LLAddr_rw_bitmask = 0,
-+        .CP0_LLAddr_shift = 0,
-+        .SYNCI_Step = 32,
-+        .CCRes = 2,
-+        .CP0_Status_rw_bitmask = 0x3778FF1F,
-+        .CP0_TCStatus_rw_bitmask = (0 << CP0TCSt_TCU3) | (0 << CP0TCSt_TCU2) |
-+                    (1 << CP0TCSt_TCU1) | (1 << CP0TCSt_TCU0) |
-+                    (0 << CP0TCSt_TMX) | (1 << CP0TCSt_DT) |
-+                    (1 << CP0TCSt_DA) | (1 << CP0TCSt_A) |
-+                    (0x3 << CP0TCSt_TKSU) | (1 << CP0TCSt_IXMT) |
-+                    (0xff << CP0TCSt_TASID),
-+        .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_L) | (1 << FCR0_W) |
-+                    (1 << FCR0_D) | (1 << FCR0_S) | (0x95 << FCR0_PRID),
-+        .CP1_fcr31 = 0,
-+        .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
-+        .CP0_SRSCtl = (0xf << CP0SRSCtl_HSS),
-+        .CP0_SRSConf0_rw_bitmask = 0x3fffffff,
-+        .CP0_SRSConf0 = (1U << CP0SRSC0_M) | (0x3fe << CP0SRSC0_SRS3) |
-+                    (0x3fe << CP0SRSC0_SRS2) | (0x3fe << CP0SRSC0_SRS1),
-+        .CP0_SRSConf1_rw_bitmask = 0x3fffffff,
-+        .CP0_SRSConf1 = (1U << CP0SRSC1_M) | (0x3fe << CP0SRSC1_SRS6) |
-+                    (0x3fe << CP0SRSC1_SRS5) | (0x3fe << CP0SRSC1_SRS4),
-+        .CP0_SRSConf2_rw_bitmask = 0x3fffffff,
-+        .CP0_SRSConf2 = (1U << CP0SRSC2_M) | (0x3fe << CP0SRSC2_SRS9) |
-+                    (0x3fe << CP0SRSC2_SRS8) | (0x3fe << CP0SRSC2_SRS7),
-+        .CP0_SRSConf3_rw_bitmask = 0x3fffffff,
-+        .CP0_SRSConf3 = (1U << CP0SRSC3_M) | (0x3fe << CP0SRSC3_SRS12) |
-+                    (0x3fe << CP0SRSC3_SRS11) | (0x3fe << CP0SRSC3_SRS10),
-+        .CP0_SRSConf4_rw_bitmask = 0x3fffffff,
-+        .CP0_SRSConf4 = (0x3fe << CP0SRSC4_SRS15) |
-+                    (0x3fe << CP0SRSC4_SRS14) | (0x3fe << CP0SRSC4_SRS13),
-+        .SEGBITS = 32,
-+        .PABITS = 32,
-+        .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_MT,
-+        .mmu_type = MMU_TYPE_R4000,
-+    },
-     {
-         .name = "74Kf",
-         .CP0_PRid = 0x00019700,
-- 
-2.14.5
author	Victor Kamensky <kamensky@cisco.com>	2020-10-19 15:21:45 -0700
committer	Richard Purdie <richard.purdie@linuxfoundation.org>	2020-10-20 11:11:46 +0100
commit	c2adcca4bf12a240a59dcffb2d729a03662c92f7 (patch)
tree	32af959db887964352b08eebd9045df0b085217b /meta/recipes-devtools
parent	f27fa7ad97a8d4eed2e4e67cb404a67fa873a425 (diff)
download	poky-c2adcca4bf12a240a59dcffb2d729a03662c92f7.tar.gz

diff --git a/meta/recipes-devtools/qemu/qemu.inc b/meta/recipes-devtools/qemu/qemu.inc index 6c0edcb706..bbb9038961 100644 --- a/meta/recipes-devtools/qemu/qemu.inc +++ b/meta/recipes-devtools/qemu/qemu.inc
@@ -31,7 +31,6 @@ SRC_URI = "https://download.qemu.org/${BPN}-${PV}.tar.xz \
31	file://0001-qemu-Do-not-include-file-if-not-exists.patch \	31	file://0001-qemu-Do-not-include-file-if-not-exists.patch \
32	file://find_datadir.patch \	32	file://find_datadir.patch \
33	file://usb-fix-setup_len-init.patch \	33	file://usb-fix-setup_len-init.patch \
34	file://0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch \
35	"	34	"
36	UPSTREAM_CHECK_REGEX = "qemu-(?P<pver>\d+(\.\d+)+)\.tar"	35	UPSTREAM_CHECK_REGEX = "qemu-(?P<pver>\d+(\.\d+)+)\.tar"
37		36


diff --git a/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch b/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch deleted file mode 100644 index b6312e1543..0000000000 --- a/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch +++ /dev/null
@@ -1,118 +0,0 @@
1	From b3fcc7d96523ad8e3ea28c09d495ef08529d01ce Mon Sep 17 00:00:00 2001
2	From: Victor Kamensky <kamensky@cisco.com>
3	Date: Wed, 7 Oct 2020 10:19:42 -0700
4	Subject: [PATCH] mips: add 34Kf-64tlb fictitious cpu type like 34Kf but with
5	64 TLBs
6
7	In Yocto Project CI runs it was observed that test run
8	of 32 bit mips image takes almost twice longer than 64 bit
9	mips image with the same logical load and CI execution
10	hits timeout.
11
12	See https://bugzilla.yoctoproject.org/show_bug.cgi?id=13992
13
14	Yocto project uses 34Kf cpu type to run 32 bit mips image,
15	and MIPS64R2-generic cpu type to run 64 bit mips64 image.
16
17	Upon qemu behavior differences investigation between mips
18	and mips64 two prominent observations came up: under
19	logically similar load (same definition and configuration
20	of user-land image) in case of mips get_physical_address
21	function is called almost twice more often, meaning
22	twice more memory accesses involved in this case. Also
23	number of tlbwr instruction executed (r4k_helper_tlbwr
24	qemu function) almost 16 time bigger in mips case than in
25	mips64.
26
27	It turns out that 34Kf cpu has 16 TLBs, but in case of
28	MIPS64R2-generic it is 64 TLBs. So that explains why
29	some many more tlbwr had to be execute by kernel TLB refill
30	handler in case of 32 bit misp.
31
32	The idea of the fix is to come up with new 34Kf-64tlb fictitious
33	cpu type, that would behave exactly as 34Kf but it would
34	contain 64 TLBs to reduce TLB trashing. After all, adding
35	more TLBs to soft mmu is easy.
36
37	Experiment with some significant non-trvial load in Yocto
38	environment by running do_testimage load shows that 34Kf-64tlb
39	cpu performs 40% or so better than original 34Kf cpu wrt test
40	execution real time.
41
42	It is not ideal to have cpu type that does not exist in the
43	wild but given performance gains it seems to be justified.
44
45	Signed-off-by: Victor Kamensky <kamensky@cisco.com>
46	---
47	target/mips/translate_init.inc.c \| 55 ++++++++++++++++++++++++++++++++++++++++
48	1 file changed, 55 insertions(+)
49
50	diff --git a/target/mips/translate_init.inc.c b/target/mips/translate_init.inc.c
51	index 637caccd89..b73ab48231 100644
52	--- a/target/mips/translate_init.inc.c
53	+++ b/target/mips/translate_init.inc.c
54	@@ -297,6 +297,61 @@ const mips_def_t mips_defs[] =
55	.insn_flags = CPU_MIPS32R2 \| ASE_MIPS16 \| ASE_DSP \| ASE_MT,
56	.mmu_type = MMU_TYPE_R4000,
57	},
58	+ /*
59	+ * Verbatim copy of "34Kf" cpu, only bumped up number of TLB entries
60	+ * from 16 to 64 (see CP0_Config0 value at CP0C1_MMU bits) to improve
61	+ * performance by reducing number of TLB refill exceptions and
62	+ * eliminating need to run all corresponding TLB refill handling
63	+ * instructions.
64	+ */
65	+ {
66	+ .name = "34Kf-64tlb",
67	+ .CP0_PRid = 0x00019500,
68	+ .CP0_Config0 = MIPS_CONFIG0 \| (0x1 << CP0C0_AR) \|
69	+ (MMU_TYPE_R4000 << CP0C0_MT),
70	+ .CP0_Config1 = MIPS_CONFIG1 \| (1 << CP0C1_FP) \| (63 << CP0C1_MMU) \|
71	+ (0 << CP0C1_IS) \| (3 << CP0C1_IL) \| (1 << CP0C1_IA) \|
72	+ (0 << CP0C1_DS) \| (3 << CP0C1_DL) \| (1 << CP0C1_DA) \|
73	+ (1 << CP0C1_CA),
74	+ .CP0_Config2 = MIPS_CONFIG2,
75	+ .CP0_Config3 = MIPS_CONFIG3 \| (1 << CP0C3_VInt) \| (1 << CP0C3_MT) \|
76	+ (1 << CP0C3_DSPP),
77	+ .CP0_LLAddr_rw_bitmask = 0,
78	+ .CP0_LLAddr_shift = 0,
79	+ .SYNCI_Step = 32,
80	+ .CCRes = 2,
81	+ .CP0_Status_rw_bitmask = 0x3778FF1F,
82	+ .CP0_TCStatus_rw_bitmask = (0 << CP0TCSt_TCU3) \| (0 << CP0TCSt_TCU2) \|
83	+ (1 << CP0TCSt_TCU1) \| (1 << CP0TCSt_TCU0) \|
84	+ (0 << CP0TCSt_TMX) \| (1 << CP0TCSt_DT) \|
85	+ (1 << CP0TCSt_DA) \| (1 << CP0TCSt_A) \|
86	+ (0x3 << CP0TCSt_TKSU) \| (1 << CP0TCSt_IXMT) \|
87	+ (0xff << CP0TCSt_TASID),
88	+ .CP1_fcr0 = (1 << FCR0_F64) \| (1 << FCR0_L) \| (1 << FCR0_W) \|
89	+ (1 << FCR0_D) \| (1 << FCR0_S) \| (0x95 << FCR0_PRID),
90	+ .CP1_fcr31 = 0,
91	+ .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
92	+ .CP0_SRSCtl = (0xf << CP0SRSCtl_HSS),
93	+ .CP0_SRSConf0_rw_bitmask = 0x3fffffff,
94	+ .CP0_SRSConf0 = (1U << CP0SRSC0_M) \| (0x3fe << CP0SRSC0_SRS3) \|
95	+ (0x3fe << CP0SRSC0_SRS2) \| (0x3fe << CP0SRSC0_SRS1),
96	+ .CP0_SRSConf1_rw_bitmask = 0x3fffffff,
97	+ .CP0_SRSConf1 = (1U << CP0SRSC1_M) \| (0x3fe << CP0SRSC1_SRS6) \|
98	+ (0x3fe << CP0SRSC1_SRS5) \| (0x3fe << CP0SRSC1_SRS4),
99	+ .CP0_SRSConf2_rw_bitmask = 0x3fffffff,
100	+ .CP0_SRSConf2 = (1U << CP0SRSC2_M) \| (0x3fe << CP0SRSC2_SRS9) \|
101	+ (0x3fe << CP0SRSC2_SRS8) \| (0x3fe << CP0SRSC2_SRS7),
102	+ .CP0_SRSConf3_rw_bitmask = 0x3fffffff,
103	+ .CP0_SRSConf3 = (1U << CP0SRSC3_M) \| (0x3fe << CP0SRSC3_SRS12) \|
104	+ (0x3fe << CP0SRSC3_SRS11) \| (0x3fe << CP0SRSC3_SRS10),
105	+ .CP0_SRSConf4_rw_bitmask = 0x3fffffff,
106	+ .CP0_SRSConf4 = (0x3fe << CP0SRSC4_SRS15) \|
107	+ (0x3fe << CP0SRSC4_SRS14) \| (0x3fe << CP0SRSC4_SRS13),
108	+ .SEGBITS = 32,
109	+ .PABITS = 32,
110	+ .insn_flags = CPU_MIPS32R2 \| ASE_MIPS16 \| ASE_DSP \| ASE_MT,
111	+ .mmu_type = MMU_TYPE_R4000,
112	+ },
113	{
114	.name = "74Kf",
115	.CP0_PRid = 0x00019700,
116	--
117	2.14.5
118