diff options
| -rw-r--r-- | meta/recipes-devtools/qemu/qemu.inc | 1 | ||||
| -rw-r--r-- | meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch | 118 |
2 files changed, 0 insertions, 119 deletions
diff --git a/meta/recipes-devtools/qemu/qemu.inc b/meta/recipes-devtools/qemu/qemu.inc index 6c0edcb706..bbb9038961 100644 --- a/meta/recipes-devtools/qemu/qemu.inc +++ b/meta/recipes-devtools/qemu/qemu.inc | |||
| @@ -31,7 +31,6 @@ SRC_URI = "https://download.qemu.org/${BPN}-${PV}.tar.xz \ | |||
| 31 | file://0001-qemu-Do-not-include-file-if-not-exists.patch \ | 31 | file://0001-qemu-Do-not-include-file-if-not-exists.patch \ |
| 32 | file://find_datadir.patch \ | 32 | file://find_datadir.patch \ |
| 33 | file://usb-fix-setup_len-init.patch \ | 33 | file://usb-fix-setup_len-init.patch \ |
| 34 | file://0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch \ | ||
| 35 | " | 34 | " |
| 36 | UPSTREAM_CHECK_REGEX = "qemu-(?P<pver>\d+(\.\d+)+)\.tar" | 35 | UPSTREAM_CHECK_REGEX = "qemu-(?P<pver>\d+(\.\d+)+)\.tar" |
| 37 | 36 | ||
diff --git a/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch b/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch deleted file mode 100644 index b6312e1543..0000000000 --- a/meta/recipes-devtools/qemu/qemu/0001-mips-add-34Kf-64tlb-fictitious-cpu-type-like-34Kf-bu.patch +++ /dev/null | |||
| @@ -1,118 +0,0 @@ | |||
| 1 | From b3fcc7d96523ad8e3ea28c09d495ef08529d01ce Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Victor Kamensky <kamensky@cisco.com> | ||
| 3 | Date: Wed, 7 Oct 2020 10:19:42 -0700 | ||
| 4 | Subject: [PATCH] mips: add 34Kf-64tlb fictitious cpu type like 34Kf but with | ||
| 5 | 64 TLBs | ||
| 6 | |||
| 7 | In Yocto Project CI runs it was observed that test run | ||
| 8 | of 32 bit mips image takes almost twice longer than 64 bit | ||
| 9 | mips image with the same logical load and CI execution | ||
| 10 | hits timeout. | ||
| 11 | |||
| 12 | See https://bugzilla.yoctoproject.org/show_bug.cgi?id=13992 | ||
| 13 | |||
| 14 | Yocto project uses 34Kf cpu type to run 32 bit mips image, | ||
| 15 | and MIPS64R2-generic cpu type to run 64 bit mips64 image. | ||
| 16 | |||
| 17 | Upon qemu behavior differences investigation between mips | ||
| 18 | and mips64 two prominent observations came up: under | ||
| 19 | logically similar load (same definition and configuration | ||
| 20 | of user-land image) in case of mips get_physical_address | ||
| 21 | function is called almost twice more often, meaning | ||
| 22 | twice more memory accesses involved in this case. Also | ||
| 23 | number of tlbwr instruction executed (r4k_helper_tlbwr | ||
| 24 | qemu function) almost 16 time bigger in mips case than in | ||
| 25 | mips64. | ||
| 26 | |||
| 27 | It turns out that 34Kf cpu has 16 TLBs, but in case of | ||
| 28 | MIPS64R2-generic it is 64 TLBs. So that explains why | ||
| 29 | some many more tlbwr had to be execute by kernel TLB refill | ||
| 30 | handler in case of 32 bit misp. | ||
| 31 | |||
| 32 | The idea of the fix is to come up with new 34Kf-64tlb fictitious | ||
| 33 | cpu type, that would behave exactly as 34Kf but it would | ||
| 34 | contain 64 TLBs to reduce TLB trashing. After all, adding | ||
| 35 | more TLBs to soft mmu is easy. | ||
| 36 | |||
| 37 | Experiment with some significant non-trvial load in Yocto | ||
| 38 | environment by running do_testimage load shows that 34Kf-64tlb | ||
| 39 | cpu performs 40% or so better than original 34Kf cpu wrt test | ||
| 40 | execution real time. | ||
| 41 | |||
| 42 | It is not ideal to have cpu type that does not exist in the | ||
| 43 | wild but given performance gains it seems to be justified. | ||
| 44 | |||
| 45 | Signed-off-by: Victor Kamensky <kamensky@cisco.com> | ||
| 46 | --- | ||
| 47 | target/mips/translate_init.inc.c | 55 ++++++++++++++++++++++++++++++++++++++++ | ||
| 48 | 1 file changed, 55 insertions(+) | ||
| 49 | |||
| 50 | diff --git a/target/mips/translate_init.inc.c b/target/mips/translate_init.inc.c | ||
| 51 | index 637caccd89..b73ab48231 100644 | ||
| 52 | --- a/target/mips/translate_init.inc.c | ||
| 53 | +++ b/target/mips/translate_init.inc.c | ||
| 54 | @@ -297,6 +297,61 @@ const mips_def_t mips_defs[] = | ||
| 55 | .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_MT, | ||
| 56 | .mmu_type = MMU_TYPE_R4000, | ||
| 57 | }, | ||
| 58 | + /* | ||
| 59 | + * Verbatim copy of "34Kf" cpu, only bumped up number of TLB entries | ||
| 60 | + * from 16 to 64 (see CP0_Config0 value at CP0C1_MMU bits) to improve | ||
| 61 | + * performance by reducing number of TLB refill exceptions and | ||
| 62 | + * eliminating need to run all corresponding TLB refill handling | ||
| 63 | + * instructions. | ||
| 64 | + */ | ||
| 65 | + { | ||
| 66 | + .name = "34Kf-64tlb", | ||
| 67 | + .CP0_PRid = 0x00019500, | ||
| 68 | + .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | | ||
| 69 | + (MMU_TYPE_R4000 << CP0C0_MT), | ||
| 70 | + .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (63 << CP0C1_MMU) | | ||
| 71 | + (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) | | ||
| 72 | + (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) | | ||
| 73 | + (1 << CP0C1_CA), | ||
| 74 | + .CP0_Config2 = MIPS_CONFIG2, | ||
| 75 | + .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_VInt) | (1 << CP0C3_MT) | | ||
| 76 | + (1 << CP0C3_DSPP), | ||
| 77 | + .CP0_LLAddr_rw_bitmask = 0, | ||
| 78 | + .CP0_LLAddr_shift = 0, | ||
| 79 | + .SYNCI_Step = 32, | ||
| 80 | + .CCRes = 2, | ||
| 81 | + .CP0_Status_rw_bitmask = 0x3778FF1F, | ||
| 82 | + .CP0_TCStatus_rw_bitmask = (0 << CP0TCSt_TCU3) | (0 << CP0TCSt_TCU2) | | ||
| 83 | + (1 << CP0TCSt_TCU1) | (1 << CP0TCSt_TCU0) | | ||
| 84 | + (0 << CP0TCSt_TMX) | (1 << CP0TCSt_DT) | | ||
| 85 | + (1 << CP0TCSt_DA) | (1 << CP0TCSt_A) | | ||
| 86 | + (0x3 << CP0TCSt_TKSU) | (1 << CP0TCSt_IXMT) | | ||
| 87 | + (0xff << CP0TCSt_TASID), | ||
| 88 | + .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_L) | (1 << FCR0_W) | | ||
| 89 | + (1 << FCR0_D) | (1 << FCR0_S) | (0x95 << FCR0_PRID), | ||
| 90 | + .CP1_fcr31 = 0, | ||
| 91 | + .CP1_fcr31_rw_bitmask = 0xFF83FFFF, | ||
| 92 | + .CP0_SRSCtl = (0xf << CP0SRSCtl_HSS), | ||
| 93 | + .CP0_SRSConf0_rw_bitmask = 0x3fffffff, | ||
| 94 | + .CP0_SRSConf0 = (1U << CP0SRSC0_M) | (0x3fe << CP0SRSC0_SRS3) | | ||
| 95 | + (0x3fe << CP0SRSC0_SRS2) | (0x3fe << CP0SRSC0_SRS1), | ||
| 96 | + .CP0_SRSConf1_rw_bitmask = 0x3fffffff, | ||
| 97 | + .CP0_SRSConf1 = (1U << CP0SRSC1_M) | (0x3fe << CP0SRSC1_SRS6) | | ||
| 98 | + (0x3fe << CP0SRSC1_SRS5) | (0x3fe << CP0SRSC1_SRS4), | ||
| 99 | + .CP0_SRSConf2_rw_bitmask = 0x3fffffff, | ||
| 100 | + .CP0_SRSConf2 = (1U << CP0SRSC2_M) | (0x3fe << CP0SRSC2_SRS9) | | ||
| 101 | + (0x3fe << CP0SRSC2_SRS8) | (0x3fe << CP0SRSC2_SRS7), | ||
| 102 | + .CP0_SRSConf3_rw_bitmask = 0x3fffffff, | ||
| 103 | + .CP0_SRSConf3 = (1U << CP0SRSC3_M) | (0x3fe << CP0SRSC3_SRS12) | | ||
| 104 | + (0x3fe << CP0SRSC3_SRS11) | (0x3fe << CP0SRSC3_SRS10), | ||
| 105 | + .CP0_SRSConf4_rw_bitmask = 0x3fffffff, | ||
| 106 | + .CP0_SRSConf4 = (0x3fe << CP0SRSC4_SRS15) | | ||
| 107 | + (0x3fe << CP0SRSC4_SRS14) | (0x3fe << CP0SRSC4_SRS13), | ||
| 108 | + .SEGBITS = 32, | ||
| 109 | + .PABITS = 32, | ||
| 110 | + .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_MT, | ||
| 111 | + .mmu_type = MMU_TYPE_R4000, | ||
| 112 | + }, | ||
| 113 | { | ||
| 114 | .name = "74Kf", | ||
| 115 | .CP0_PRid = 0x00019700, | ||
| 116 | -- | ||
| 117 | 2.14.5 | ||
| 118 | |||
