summaryrefslogtreecommitdiffstats
path: root/meta
diff options
context:
space:
mode:
authorDeepthi Hemraj <Deepthi.Hemraj@windriver.com>2024-10-01 04:48:55 -0700
committerSteve Sakoman <steve@sakoman.com>2024-10-07 05:43:22 -0700
commit614435f3111f2c0bd699e281f95d82fe65ca85ca (patch)
treedca0d17b60dd9a1a2a1139612467e3d6e67e8c23 /meta
parent8c4a3b7c651b0d9dccfc3efc8b8f0dbea9dd552a (diff)
downloadpoky-614435f3111f2c0bd699e281f95d82fe65ca85ca.tar.gz
gcc: upgrade to v11.5
gcc stable version upgraded from v11.4 to v11.5 Dropped CVE-2023-4039.patch and 0001-aarch64-Update-Neoverse-N2-core-defini.patch because its been taken to gcc-11.5 with below commits 75c37e0314 and 50d9db203bc For changes in v11.5 see: https://gcc.gnu.org/gcc-11/changes.html Below is the bug fix list for v11.5: https://gcc.gnu.org/bugzilla/buglist.cgi?bug_status=RESOLVED&list_id=444046&resolution=FIXED&target_milestone=11.5 There are a total 164 bugs are fixed in this release, below is the list of bugs fixed excluding the regression fixes. ID Product Comp Assignee▲ Summary 112672 gcc target ubizjak [14 Regression] wrong code with __builtin_parityl() at -O and above on x86_64 111736 gcc sanitize unassigned Address sanitizer is not compatible with named address spaces 80899 gcc ipa hubicka [11/12/13/14 Regression] Devirtualization causes incorrect code generation with placement new in some cases 105301 gcc c++ iains [11 Regression] ICE: tree check: expected tree that contains 'decl minimal' structure, have 'overload' in coro_promise_type_found_p, at cp/coroutines.cc:516 110027 gcc middle-e jakub [11 regression] Stack objects with extended alignments (vectors etc) misaligned on detect_stack_use_after_return 110079 gcc rtl-opti jakub [11 Regression] ICE with -freorder-blocks-and-partition and inline-asm goto 110731 gcc tree-opt jakub [11/12 Regression] Wrong-code because of wide-int division since r5-424 111015 gcc tree-opt jakub [11/12/13/14 Regression] __int128 bitfields optimized incorrectly to the 64 bit operations 112727 gcc sanitize jakub [11/12/13 Regression] UBSAN creates GIMPLE path with uninitialized variable 113674 gcc c++ jakub [11 Regression] [[____attr____]] causes internal compiler error: in decl_attributes, at attribs.cc:776 114310 gcc target jakub [11 Regression] [aarch64] __sync_val_compare_and_swap fails on __int128_t with newval = 0 114493 gcc c jakub [11 Regression] internal compiler error: in fld_incomplete_type_of with may_alias 114566 gcc tree-opt jakub [11 Regression] Misaligned vmovaps when compiling with stack-protector-strong for znver4 114634 gcc c++ jakub [11 Regression] Crash Issue Encountered in GCC Compilation of Template Code with Aligned Attribute since r9-1745 114691 gcc c++ jakub [11 Regression] Bogus ignoring loop annotation warning 114825 gcc fortran Jakub [11 Regression] Compiler error using gfortran and OpenMP since r5-1190 114876 gcc tree-opt jakub [11 Regression] -fprintf-return-value mishandles %lc with a '\0' argument. 114956 gcc sanitize jakub [11 Regression] Segmentation fault with -fsanitize=address -fsanitize=null -O2 when attribute no_sanitize_address is enabled since r9-5742 106890 gcc c++ jason [11 Regression] virtual inheritance triggers compiler error when instatiating derived class with in-class initialization since r8-2709-g12659e10c7820071 111529 gcc c++ jason [11/12/13 Regression] ICE on bool conversion in an unrolled loop condition inside template lambda nested in another template scope 113598 gcc c++ jason [11/12/13 Regression] GCC internal compiler error since r0-124275 114561 gcc c++ jason [11/12 Regression] Comma operator with forwarding reference to pointer raises invalid lvalue required error since r10-7410 114562 gcc c++ jason [11/12 Regression] ICE when trying to bind rvalue reference to lvalue with comma operator and forwarding reference to pointer since r10-7410 115565 gcc rtl-opti macro [11/12/13/14/15 Regression] CSE: Comparison incorrectly evaluated as constant causing optimization to produce wrong code 109876 gcc c++ mpolacek [11/12 Regression] initializer_list not usable in constant expressions in a template 110106 gcc c++ mpolacek [11/12 Regression] ICE on noexcept(noexcept(...)) with optional 89224 gcc c++ pinskia [11/12/13/14/15 Regression] subscript of const vector has the wrong type 110386 gcc tree-opt pinskia [11/12 Regression] ICE with ABSU in backprop 111331 gcc tree-opt pinskia [11/12 Regression] Wrong code at -O1 on x86_64-linux-gnu since 108120 gcc target rearnsha [11/12 Regression] ICE: in extract_insn, at recog.cc:2791 (on ARM with -mfpu=neon -freciprocal-math -O3) 95048 gcc libstdc+ redi [11 Regression] wstring-constructor of std::filesystem::path throws for non-ASCII characters 104606 gcc libstdc+ redi [11 Regression] comparison operator resolution with std::optional and -std=c++20 90348 gcc middle-e rguenth [11 Regression] Partition of char arrays is incorrect in some cases 96881 gcc tree-opt rguenth [11 Regression] Clobbers on NULL vs. DCE since r8-1519 97990 gcc c++ rguenth [11 Regression] ICE: ‘verify_type’ failed with vector types and non-PODs since r6-5222-gba6a6a1d44c17f25 103006 gcc middle-e rguenth [12/13/14/15 Regression] wrong code at -O1 or -O2 on x86_64-linux-gnu by r7-7101 110176 gcc tree-opt rguenth [11 Regression] wrong code at -Os and above on x86_64-linux-gnu since r11-2446 110295 gcc c++ rguenth [11 Regression] ICE in dwarf2out_finish with local class with inherited operator delete in a templated function and -g 110298 gcc tree-opt rguenth [11 Regression] ICE at -Os on x86_64-linux-gnu since r10-840 111039 gcc tree-opt rguenth [11 Regression] Unable to coalesce ssa_names 111080 gcc debug rguenth [11 Regression] restrict qualifier causes extra debug info to happen 111472 gcc tree-opt rguenth [11 Regression] Wrong code at -Os on x86_64-linux-gnu since r11-4563-gd0d8b5d836 111614 gcc tree-opt rguenth [11 Regression] ICE at -O2: verify_gimple failed since r14-2282-gf703d2fd3f0 111764 gcc tree-opt rguenth [11 Regression] Wrong code at -O3 on x86_64-linux-gnu 111818 gcc middle-e rguenth [11 Regression] ICE with __builtin_memcpy with volatile and constants 111917 gcc tree-opt rguenth [11 Regression] ICE in as_a, at is-a.h:255 since GCC-7 112495 gcc tree-opt rguenth [11 Regression] ICE: verify_gimple failed (after vectorizer) with named address space (__seg_gs ) 112505 gcc tree-opt rguenth [11 Regression] internal compiler error: in build_vector_from_val, at tree.cc:2104 since r10-4076 112718 gcc debug rguenth [11 Regression] ICE: in add_dwarf_attr, at dwarf2out.cc:4501 with -g -fdebug-types-section -flto -ffat-lto-objects 112793 gcc tree-opt rguenth [11 regression] ICE when building stellarium (internal compiler error: in vect_schedule_slp_node, at tree-vect-slp.cc:9062) 114027 gcc tree-opt rguenth [11 Regression] miscompile at `-O3 -fno-vect-cost-model -msse4.2` 114734 gcc target rguenth [11 regression] RISC-V rv64gcv_zvl256b miscompile with -flto -O3 -mrvv-vector-bits=zvl since r8-6047-g65dd1346027bb5 108086 gcc rtl-opti rsandifo [11 Regression] internal compiler error: in set_accesses, at rtl-ssa/internals.inl:449 113281 gcc tree-opt rsandifo [11 Regression] Latent wrong code due to vectorization of shift reduction and missing promotions since r9-1590 113552 gcc tree-opt tnfchris [11/12/13 Regression] vectorizer generates calls to vector math routines with 1 simd lane. 29256 gcc target unassigned [11/12/13/14 regression] loop performance regression 82446 gcc tree-opt unassigned [11/12/13/14 Regression] Missed equalities in dr_group_sort_cmp 93631 gcc c unassigned [11/12/13/14 Regression] ICE on an invalid strcmp call in gimple_call_arg, at gimple.h:3258 93930 gcc target unassigned [11/12/13/14 Regression] Unnecessary broadcast instructions for AVX512 94335 gcc tree-opt unassigned [11/12/13/14 Regression] False positive -Wstringop-overflow warning with -O2 97140 gcc target unassigned [11/12/13/14 Regression] ICE in error: unable to generate reloads for since r10-400-gecfdb16c54ad06ac 100623 gcc target unassigned [11 Regression] wrong code with -Os -fno-dce -fno-defer-pop -fno-forward-propagate -flive-range-shrinkage -fno-rerun-cse-after-loop -mno-push-args since r10-7515-g2c0fa3ecf70d199a 100667 gcc libstdc+ unassigned [11/12 Regression] std::tuple<A&&> cannot be constructed from A&&, if A not defined (only forward declared) 103497 gcc c++ unassigned [11/12/13/14 Regression] ICE when decltype(auto)... as parameters 105034 gcc target unassigned [11/12/13/14 regression]Suboptimal codegen for min/max with -Os 107057 gcc rtl-opti unassigned [11/12 Regression] ICE in extract_constrain_insn, at recog.cc:2692 109800 gcc target acoplan [11 Regression] arm: ICE (segfault) loading double with -mpure-code -mbig-endian 110288 gcc fortran anlauf [11/12/13/14] Regression: segfault in findloc with allocatable array of allocatable characters 110585 gcc fortran anlauf ICE in gfc_compare_expr for findloc with complex literal array 110658 gcc fortran anlauf MINVAL/MAXVAL and deferred-length character arrays 115611 gcc target avieira mve: vsetq_lane for 64-bits has wrong codegen when setting lane 1 113893 gcc ada ebotcazou finalization of object allocated by anonymous access type designating local type 111050 gcc libstdc+ fdumont [11/12/13/14 Regression] ABI break in _Hash_node_value_base since GCC 11 110624 gcc target iains Xcode 15 ld warns about -macosx_version_min 114171 gcc d ibuclaw [13/14 Regression] gdc -O2 -mavx generates misaligned vmovdqa instruction 108789 gcc middle-e jakub __builtin_(add|mul|sub)_overflow methods generate duplicate operations if both operands are const which in turn causes wrong code due to overlapping arguments 110115 gcc middle-e jakub [11 Regression] Wrong code at -O1 on x86_64-linux-gnu 110914 gcc tree-opt jakub [11/12/13/14 Regression] Optimization eliminating necessary assignment before 0-byte memcpy since r10-5451 111422 gcc middle-e jakub Wrong code at -O3 on x86_64-linux-gnu 112816 gcc target jakub [11/12 Regression] ICE unrecognizable_insn with __builtin_signbit and returning struct with int[4] 113122 gcc target jakub Assembler messages: Error: operand type mismatch for `movabs' / bad expression / invalid use of register with -fprofile -mcmodel=large -masm=intel 113192 gcc libgomp jakub [11 Regression] ERROR: couldn't execute "../../../gcc/libgomp/testsuite/flock": no such file or directory 113262 gcc c jakub [11 Regression] ICE when using [[gnu::copy("")]] attribute 114533 gcc libquadm jakub libquadmath: printf: fix misaligned access on args 114537 gcc c++ jakub bit_cast does not work NSDMI of bitfields 114572 gcc c++ jakub [OpenMP] "internal compiler error: in assign_temp" with assignment operator and lastprivate clause 115172 gcc sanitize jakub Invalid -fsanitize=bool sanitization of variable from named address space 115440 gcc driver jakub unrecognized command-line option '--c++17'; did you mean '--stdc++17'? 110422 gcc tree-opt jamborm asm goto vs SRA 92145 gcc c++ jason -Wdeprecated-copy false-positive when inheriting base assignment operators 92407 gcc c++ jason Destruction of objects returned from functions skipped by goto 103185 gcc c++ jason [11/12/13 Regression] ind[arr] is rejected when arr is an array prvalue 106310 gcc c++ jason [11 Regression] lookup after this-> seems wrong for dependent lookup since r12-6754-g30f2c22def739211 111357 gcc c++ jason [11/12/13/14 Regression] __integer_pack fails to work with values of dependent type convertible to integers in noexcept context 114130 gcc target kito [11 Regression] RISC-V: `__atomic_compare_exchange` does not use sign-extended value for RV64 113250 gcc libstdc+ kmatsui std::filesystem::equivalent("", "/") should throw 115457 gcc target ktkachov AArch64 should define __ARM_FEATURE_BF16 115475 gcc target ktkachov AArch64 should define __ARM_FEATURE_SVE_BF16 when appropriate 88309 gcc target linkw [11/12/13/14 Regression] ICE: Floating point exception (in is_miss_rate_acceptable), target assigning alignent of 4 bits(!) to vector 104259 gcc libstdc+ marxin libstdc++ fails for epiphany-elf 109822 gcc libstdc+ mkretz Converting std::experimental::simd masks yields an error 59465 gcc c++ mpolacek [11/12/13 Regression] g++ allows direct-initialization of an array of class type from another array in a mem-initializer 100557 gcc c++ mpolacek [11/12/13/14 Regression] Internal compiler error: Error reporting routines re-entered. 115642 gcc c mpolacek [11/12/13/14/15 Regression] internal compiler error: tree check: expected class 'type', have 'exceptional' (error_mark) in c_expr_sizeof_expr 95351 gcc middle-e pinskia [11/12 Regression] Comparison with NAN optimizes incorrectly with -ffast-math disabled 111699 gcc middle-e pinskia [11/12/13 Regression] ICE: SIGSEGV: infinite recursion in fold_build3_loc/fold_ternary_loc/generic_simplify_VEC_COND_EXPR 109761 gcc c++ ppalka [11/12 Regression] Nested class destructor's noexcept specification incorrectly considered as too loose compared to the outer class 111485 gcc c++ ppalka [11/12 Regression] Constraint mismatch on template template parameter 113175 gcc testsuit ppalka [11/12/13/14 Regression] testsuite/std/ranges/iota/max_size_type.cc 5x times slower 111407 gcc tree-opt qinzhao [11/12/13 Regression] ICE: SSA corruption due to widening_mul opt on conflict across an abnormal edge 99327 gcc libstdc+ redi ENOTSUP macro does not exist on djgpp crt 104161 gcc libstdc+ red Potential Security Vulnerability: remove_all and symbolic link 105178 gcc libstdc+ redi [11 Regression] g++ incorrectly reports invalid use of incomplete type 108178 gcc libstdc+ redi Filesystem::copy_file can't copy from /proc on Linux machines 112491 gcc libstdc+ redi std::deque<T,Allocator>::size xmethod output is wrong 114147 gcc libstdc+ redi [11 Regression] tuple allocator-extended constructor requires non-explicit default constructor 114401 gcc libstdc+ redi libstdc++ allocator destructor omitted when reinserting node_handle into tree- and hashtable-based containers 96109 gcc testsuit rguenth [11 Regression] gcc.dg/vect/slp-47.c etc. FAIL 110182 gcc tree-opt rguenth [11 Regression] Vector(2) cast from double to float and back and subtraction seems to produce incorrect results 110200 gcc middle-e rguenth genmatch generating questionable code with convert and ! 98237 gcc ipa ro gcc-dg-lto-modref-3-01.exe etc. FAIL when LTO plugin is not enabled 97696 gcc sanitize rsandifo ICE since ASAN_MARK does not handle poly_int sized varibales 100303 gcc debug rsandifo [11 Regression] -fcompare-debug failure (length) with -O -fno-dce -ftracer 111340 gcc target ubizjak gcc.dg/bitint-12.c fails on x86_64-apple-darwin or fails on x86_64-linux-gnu with -fPIE 115297 gcc rtl-opti ubizjak [14/15 regression] alpha: ICE in simplify_subreg, at simplify-rtx.cc:7554 with -O1 115836 gcc middle-e ubizjak ICE when building Firefox with-march=pentium-mmx -mtune=pentium-m 91085 gcc other unassigned [11 only] fixincludes breaks <bits/statx.h> 103183 gcc c++ unassigned [11/12/13/14 Regression] ind[arr] produces an lvalue when arr is an array xvalue 105417 gcc libstdc+ unassigned [11 Regression] powerpc64le-linux abilist changes based on --with-long-double-format= 110309 gcc target unassigned Wrong code for masked load expansion 111922 gcc ipa unassigned [11/12/13/14 Regression] ICE in cp with -O2 -fno-tree-fre 112823 gcc other unassigned [11 only] -Wincompatible-pointer-types errors in libiberty/simple-object-mach-o.c (missing backport for gcc-11) 112891 gcc target unassigned [11/12/13/14 Regression] Missing vzeroupper insert 114049 gcc target unassigned gcc.dg/framework-1.c FAILs with Xcode 15.3 beta 3 114098 gcc target unassigned _tile_loadconfig doesn't work 114521 gcc target unassigned [11 only] aarch64: wrong code with Neon ld1/st1x4 intrinsics gcc-11 and earlier 115261 gcc rtl-opti unassigned [11/12/13/14/15 regression] FAIL: gcc.target/s390/vector/vec-abi-vararg-1.c 115269 gcc libstdc+ unassigned Hardcoded links in 14.1 docs to pages from release 4.3.2 115870 gcc tree-opt unassigned Inlining of different template instances wrongly produces infinite loop 103715 gcc fortran anlauf [11/12/13/14 Regression] ICE in gfc_find_gsymbol, at fortran/symbol.c:4301 since r9-3803-ga5fbc2f36a291cbe 111837 gcc fortran anlauf [11/12/13/14 Regression] Out of bounds access with optimization inside io-implied-do-control 114474 gcc fortran anlauf [11/12/13/14 Regression] DATA statement with derived type, pointer component rejected 113979 gcc ada ebotcazou [11/12/13/14 regression] bogus error on allocator for array type with Dynamic_Predicate 103506 gcc fortran jvdelisle [11 Regression] ICE in gfc_free_namespace, at fortran/symbol.c:4039 since r10-2798-ge68a35ae4a65d2b3 107397 gcc fortran jvdelisle [11/12 Regression] ICE in gfc_arith_plus, at fortran/arith.cc:654 91035 gcc target krebbel [11/12/13/14 Regression] gotools fails to build on s390x-linux-gnu 84006 gcc fortran pault [11/12 Regression] ICE in storage_size() with CLASS entity 89462 gcc fortran pault [11/12/13 Regression] gfortran loops in code generation 93678 gcc fortran pault [11/12/13 Regression] ICE with TRANSFER and typebound procedures 103312 gcc fortran pault [11/12 Regression] ICE in gfc_find_component since r9-1098-g3cf89a7b992d483e 103368 gcc fortran pault [11/12/13 Regression] ICE in gimplify_expr, at gimplify.c:15668 since r12-4464-g017665f63047ce47 103716 gcc fortran pault [11/12/13 Regression] ICE in gimplify_expr, at gimplify.c:15964 since r9-3803-ga5fbc2f36a291cbe 106999 gcc fortran pault [11/12/13 Regression] ICE tree check: expected record_type or union_type or qual_union_type, have function_type in gfc_class_data_get, at fortran/trans-expr.cc:233 71703 gcc fortran unassigned [11 Regression] [OOP] ICE in wide_int_to_tree, at tree.c:1488 89925 gcc fortran unassigned [11 Regression] Wrong array bounds from ALLOCATE with SOURCE or MOLD 99183 gcc fortran unassigned [11 Regression] Incompatible Runtime types 99757 gcc c++ unassigned [11/12/13/14 Regression] ICE: in cp_finish_decl, at cp/decl.c:7736 104391 gcc fortran unassigned [11 Regression] bind(C) and allocatable or pointer attribute don't work 104908 gcc fortran unassigned [11/12/13/14 Regression] incorrect Fortran out-of-bound runtime error. 113179 gcc middle-e unassigned [11/12/13/14/15 Regression] MIPS: INS is used for long long, before SLL 80774 gcc fortran vehre [11/12/13/14/15 Regression][Coarray] ICE in gfc_conv_descriptor_data_get, at fortran/trans-array.c 82904 gcc fortran vehre [11/12/13/14/15 Regression][Coarray] ICE in make_ssa_name_fn, at tree-ssanames.c:261 111880 gcc fortran anlauf [11/12/13/14] False positive warning of obsolescent COMMON block with Fortran submodule 61527 gcc fortran unassigned [11/12/13/14 Regression] [OOP] class/extends, multiple generic assignment, accept invalid (From OE-Core rev: 7806e21e7d47fd010872a3967e51bcacdae3a76b) Signed-off-by: Deepthi Hemraj <Deepthi.Hemraj@windriver.com> Signed-off-by: Steve Sakoman <steve@sakoman.com>
Diffstat (limited to 'meta')
-rw-r--r--meta/conf/distro/include/maintainers.inc2
-rw-r--r--meta/recipes-devtools/gcc/gcc-11.5.inc (renamed from meta/recipes-devtools/gcc/gcc-11.4.inc)8
-rw-r--r--meta/recipes-devtools/gcc/gcc-cross-canadian_11.5.bb (renamed from meta/recipes-devtools/gcc/gcc-cross-canadian_11.4.bb)0
-rw-r--r--meta/recipes-devtools/gcc/gcc-cross_11.5.bb (renamed from meta/recipes-devtools/gcc/gcc-cross_11.4.bb)0
-rw-r--r--meta/recipes-devtools/gcc/gcc-crosssdk_11.5.bb (renamed from meta/recipes-devtools/gcc/gcc-crosssdk_11.4.bb)0
-rw-r--r--meta/recipes-devtools/gcc/gcc-runtime_11.5.bb (renamed from meta/recipes-devtools/gcc/gcc-runtime_11.4.bb)0
-rw-r--r--meta/recipes-devtools/gcc/gcc-sanitizers_11.5.bb (renamed from meta/recipes-devtools/gcc/gcc-sanitizers_11.4.bb)0
-rw-r--r--meta/recipes-devtools/gcc/gcc-source_11.5.bb (renamed from meta/recipes-devtools/gcc/gcc-source_11.4.bb)0
-rw-r--r--meta/recipes-devtools/gcc/gcc/0001-aarch64-Update-Neoverse-N2-core-defini.patch38
-rw-r--r--meta/recipes-devtools/gcc/gcc/CVE-2023-4039.patch2893
-rw-r--r--meta/recipes-devtools/gcc/gcc_11.5.bb (renamed from meta/recipes-devtools/gcc/gcc_11.4.bb)0
-rw-r--r--meta/recipes-devtools/gcc/libgcc-initial_11.5.bb (renamed from meta/recipes-devtools/gcc/libgcc-initial_11.4.bb)0
-rw-r--r--meta/recipes-devtools/gcc/libgcc_11.5.bb (renamed from meta/recipes-devtools/gcc/libgcc_11.4.bb)0
-rw-r--r--meta/recipes-devtools/gcc/libgfortran_11.5.bb (renamed from meta/recipes-devtools/gcc/libgfortran_11.4.bb)0
14 files changed, 4 insertions, 2937 deletions
diff --git a/meta/conf/distro/include/maintainers.inc b/meta/conf/distro/include/maintainers.inc
index bfc14951fe..7904a6ea03 100644
--- a/meta/conf/distro/include/maintainers.inc
+++ b/meta/conf/distro/include/maintainers.inc
@@ -189,7 +189,7 @@ RECIPE_MAINTAINER:pn-gcc-cross-canadian-${TRANSLATED_TARGET_ARCH} = "Khem Raj <r
189RECIPE_MAINTAINER:pn-gcc-crosssdk-${SDK_SYS} = "Khem Raj <raj.khem@gmail.com>" 189RECIPE_MAINTAINER:pn-gcc-crosssdk-${SDK_SYS} = "Khem Raj <raj.khem@gmail.com>"
190RECIPE_MAINTAINER:pn-gcc-runtime = "Khem Raj <raj.khem@gmail.com>" 190RECIPE_MAINTAINER:pn-gcc-runtime = "Khem Raj <raj.khem@gmail.com>"
191RECIPE_MAINTAINER:pn-gcc-sanitizers = "Khem Raj <raj.khem@gmail.com>" 191RECIPE_MAINTAINER:pn-gcc-sanitizers = "Khem Raj <raj.khem@gmail.com>"
192RECIPE_MAINTAINER:pn-gcc-source-11.4.0 = "Khem Raj <raj.khem@gmail.com>" 192RECIPE_MAINTAINER:pn-gcc-source-11.5.0 = "Khem Raj <raj.khem@gmail.com>"
193RECIPE_MAINTAINER:pn-gconf = "Ross Burton <ross.burton@arm.com>" 193RECIPE_MAINTAINER:pn-gconf = "Ross Burton <ross.burton@arm.com>"
194RECIPE_MAINTAINER:pn-gcr = "Alexander Kanavin <alex.kanavin@gmail.com>" 194RECIPE_MAINTAINER:pn-gcr = "Alexander Kanavin <alex.kanavin@gmail.com>"
195RECIPE_MAINTAINER:pn-gdb = "Khem Raj <raj.khem@gmail.com>" 195RECIPE_MAINTAINER:pn-gdb = "Khem Raj <raj.khem@gmail.com>"
diff --git a/meta/recipes-devtools/gcc/gcc-11.4.inc b/meta/recipes-devtools/gcc/gcc-11.5.inc
index fd6a3e92e3..c316d2a9a0 100644
--- a/meta/recipes-devtools/gcc/gcc-11.4.inc
+++ b/meta/recipes-devtools/gcc/gcc-11.5.inc
@@ -2,11 +2,11 @@ require gcc-common.inc
2 2
3# Third digit in PV should be incremented after a minor release 3# Third digit in PV should be incremented after a minor release
4 4
5PV = "11.4.0" 5PV = "11.5.0"
6 6
7# BINV should be incremented to a revision after a minor gcc release 7# BINV should be incremented to a revision after a minor gcc release
8 8
9BINV = "11.4.0" 9BINV = "11.5.0"
10 10
11FILESEXTRAPATHS =. "${FILE_DIRNAME}/gcc:${FILE_DIRNAME}/gcc/backport:" 11FILESEXTRAPATHS =. "${FILE_DIRNAME}/gcc:${FILE_DIRNAME}/gcc/backport:"
12 12
@@ -65,14 +65,12 @@ SRC_URI = "\
65 file://0003-CVE-2021-42574.patch \ 65 file://0003-CVE-2021-42574.patch \
66 file://0004-CVE-2021-42574.patch \ 66 file://0004-CVE-2021-42574.patch \
67 file://0001-CVE-2021-46195.patch \ 67 file://0001-CVE-2021-46195.patch \
68 file://0001-aarch64-Update-Neoverse-N2-core-defini.patch \
69 file://0002-aarch64-add-armv9-a-to-march.patch \ 68 file://0002-aarch64-add-armv9-a-to-march.patch \
70 file://0003-aarch64-Enable-FP16-feature-by-default-for-Armv9.patch \ 69 file://0003-aarch64-Enable-FP16-feature-by-default-for-Armv9.patch \
71 file://0004-arm-add-armv9-a-architecture-to-march.patch \ 70 file://0004-arm-add-armv9-a-architecture-to-march.patch \
72 file://CVE-2023-4039.patch \
73" 71"
74 72
75SRC_URI[sha256sum] = "3f2db222b007e8a4a23cd5ba56726ef08e8b1f1eb2055ee72c1402cea73a8dd9" 73SRC_URI[sha256sum] = "a6e21868ead545cf87f0c01f84276e4b5281d672098591c1c896241f09363478"
76 74
77S = "${TMPDIR}/work-shared/gcc-${PV}-${PR}/gcc-${PV}" 75S = "${TMPDIR}/work-shared/gcc-${PV}-${PR}/gcc-${PV}"
78 76
diff --git a/meta/recipes-devtools/gcc/gcc-cross-canadian_11.4.bb b/meta/recipes-devtools/gcc/gcc-cross-canadian_11.5.bb
index bf53c5cd78..bf53c5cd78 100644
--- a/meta/recipes-devtools/gcc/gcc-cross-canadian_11.4.bb
+++ b/meta/recipes-devtools/gcc/gcc-cross-canadian_11.5.bb
diff --git a/meta/recipes-devtools/gcc/gcc-cross_11.4.bb b/meta/recipes-devtools/gcc/gcc-cross_11.5.bb
index b43cca0c52..b43cca0c52 100644
--- a/meta/recipes-devtools/gcc/gcc-cross_11.4.bb
+++ b/meta/recipes-devtools/gcc/gcc-cross_11.5.bb
diff --git a/meta/recipes-devtools/gcc/gcc-crosssdk_11.4.bb b/meta/recipes-devtools/gcc/gcc-crosssdk_11.5.bb
index 40a6c4feff..40a6c4feff 100644
--- a/meta/recipes-devtools/gcc/gcc-crosssdk_11.4.bb
+++ b/meta/recipes-devtools/gcc/gcc-crosssdk_11.5.bb
diff --git a/meta/recipes-devtools/gcc/gcc-runtime_11.4.bb b/meta/recipes-devtools/gcc/gcc-runtime_11.5.bb
index dd430b57eb..dd430b57eb 100644
--- a/meta/recipes-devtools/gcc/gcc-runtime_11.4.bb
+++ b/meta/recipes-devtools/gcc/gcc-runtime_11.5.bb
diff --git a/meta/recipes-devtools/gcc/gcc-sanitizers_11.4.bb b/meta/recipes-devtools/gcc/gcc-sanitizers_11.5.bb
index 8bda2ccad6..8bda2ccad6 100644
--- a/meta/recipes-devtools/gcc/gcc-sanitizers_11.4.bb
+++ b/meta/recipes-devtools/gcc/gcc-sanitizers_11.5.bb
diff --git a/meta/recipes-devtools/gcc/gcc-source_11.4.bb b/meta/recipes-devtools/gcc/gcc-source_11.5.bb
index b890fa33ea..b890fa33ea 100644
--- a/meta/recipes-devtools/gcc/gcc-source_11.4.bb
+++ b/meta/recipes-devtools/gcc/gcc-source_11.5.bb
diff --git a/meta/recipes-devtools/gcc/gcc/0001-aarch64-Update-Neoverse-N2-core-defini.patch b/meta/recipes-devtools/gcc/gcc/0001-aarch64-Update-Neoverse-N2-core-defini.patch
deleted file mode 100644
index a0c9db72e1..0000000000
--- a/meta/recipes-devtools/gcc/gcc/0001-aarch64-Update-Neoverse-N2-core-defini.patch
+++ /dev/null
@@ -1,38 +0,0 @@
1From 9f37d31324f89d0b7b2abac988a976d121ae29c6 Mon Sep 17 00:00:00 2001
2From: Andre Vieira <andre.simoesdiasvieira@arm.com>
3Date: Thu, 8 Sep 2022 06:02:18 +0000
4Subject: [PATCH 1/4] aarch64: Update Neoverse N2 core definition
5
6commit 9f37d31324f89d0b7b2abac988a976d121ae29c6 from upstream.
7
8gcc/ChangeLog:
9
10 * config/aarch64/aarch64-cores.def: Update Neoverse N2 core entry.
11
12Upstream-Status: Backport
13Signed-off-by: Ruiqiang Hao <Ruiqiang.Hao@windriver.com>
14---
15 gcc/config/aarch64/aarch64-cores.def | 6 +++---
16 1 file changed, 3 insertions(+), 3 deletions(-)
17
18diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
19index 4643e0e27..3478e567a 100644
20--- a/gcc/config/aarch64/aarch64-cores.def
21+++ b/gcc/config/aarch64/aarch64-cores.def
22@@ -147,7 +147,6 @@
23 AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO, saphira, 0x51, 0xC01, -1)
24
25 /* Armv8.5-A Architecture Processors. */
26-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoversen2, 0x41, 0xd49, -1)
27 AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoverse512tvb, 0x41, 0xd4f, -1)
28
29 /* ARMv8-A big.LITTLE implementations. */
30@@ -165,4 +164,7 @@
31 /* Armv8-R Architecture Processors. */
32 AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cortexa53, 0x41, 0xd15, -1)
33
34+/* Armv9-A Architecture Processors. */
35+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
36+
37 #undef AARCH64_CORE
38
diff --git a/meta/recipes-devtools/gcc/gcc/CVE-2023-4039.patch b/meta/recipes-devtools/gcc/gcc/CVE-2023-4039.patch
deleted file mode 100644
index 41684fe7dd..0000000000
--- a/meta/recipes-devtools/gcc/gcc/CVE-2023-4039.patch
+++ /dev/null
@@ -1,2893 +0,0 @@
1From: Richard Sandiford <richard.sandiford@arm.com>
2Subject: [PATCH 00/19] aarch64: Fix -fstack-protector issue
3Date: Tue, 12 Sep 2023 16:25:10 +0100
4
5This series of patches fixes deficiencies in GCC's -fstack-protector
6implementation for AArch64 when using dynamically allocated stack space.
7This is CVE-2023-4039. See:
8
9https://developer.arm.com/Arm%20Security%20Center/GCC%20Stack%20Protector%20Vulnerability%20AArch64
10https://github.com/metaredteam/external-disclosures/security/advisories/GHSA-x7ch-h5rf-w2mf
11
12for more details.
13
14The fix is to put the saved registers above the locals area when
15-fstack-protector is used.
16
17The series also fixes a stack-clash problem that I found while working
18on the CVE. In unpatched sources, the stack-clash problem would only
19trigger for unrealistic numbers of arguments (8K 64-bit arguments, or an
20equivalent). But it would be a more significant issue with the new
21-fstack-protector frame layout. It's therefore important that both
22problems are fixed together.
23
24Some reorganisation of the code seemed necessary to fix the problems in a
25cleanish way. The series is therefore quite long, but only a handful of
26patches should have any effect on code generation.
27
28See the individual patches for a detailed description.
29
30Tested on aarch64-linux-gnu. Pushed to trunk and to all active branches.
31I've also pushed backports to GCC 7+ to vendors/ARM/heads/CVE-2023-4039.
32
33CVE: CVE-2023-4039
34Upstream-Status: Backport
35Signed-off-by: Ross Burton <ross.burton@arm.com>
36
37
38From 52816ab48f97968f3fbfb5656250f3de7c00166d Mon Sep 17 00:00:00 2001
39From: Richard Sandiford <richard.sandiford@arm.com>
40Date: Tue, 12 Sep 2023 16:19:43 +0100
41Subject: [PATCH 01/19] aarch64: Use local frame vars in shrink-wrapping code
42
43aarch64_layout_frame uses a shorthand for referring to
44cfun->machine->frame:
45
46 aarch64_frame &frame = cfun->machine->frame;
47
48This patch does the same for some other heavy users of the structure.
49No functional change intended.
50
51gcc/
52 * config/aarch64/aarch64.c (aarch64_save_callee_saves): Use
53 a local shorthand for cfun->machine->frame.
54 (aarch64_restore_callee_saves, aarch64_get_separate_components):
55 (aarch64_process_components): Likewise.
56 (aarch64_allocate_and_probe_stack_space): Likewise.
57 (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise.
58 (aarch64_layout_frame): Use existing shorthand for one more case.
59---
60 gcc/config/aarch64/aarch64.c | 115 ++++++++++++++++++-----------------
61 1 file changed, 60 insertions(+), 55 deletions(-)
62
63diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
64index 391a93f3018..77c1d1300a5 100644
65--- a/gcc/config/aarch64/aarch64.c
66+++ b/gcc/config/aarch64/aarch64.c
67@@ -7994,6 +7994,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
68 unsigned start, unsigned limit, bool skip_wb,
69 bool hard_fp_valid_p)
70 {
71+ aarch64_frame &frame = cfun->machine->frame;
72 rtx_insn *insn;
73 unsigned regno;
74 unsigned regno2;
75@@ -8008,8 +8009,8 @@ aarch64_save_callee_saves (poly_int64 start_offset,
76 bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
77
78 if (skip_wb
79- && (regno == cfun->machine->frame.wb_candidate1
80- || regno == cfun->machine->frame.wb_candidate2))
81+ && (regno == frame.wb_candidate1
82+ || regno == frame.wb_candidate2))
83 continue;
84
85 if (cfun->machine->reg_is_wrapped_separately[regno])
86@@ -8017,7 +8018,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
87
88 machine_mode mode = aarch64_reg_save_mode (regno);
89 reg = gen_rtx_REG (mode, regno);
90- offset = start_offset + cfun->machine->frame.reg_offset[regno];
91+ offset = start_offset + frame.reg_offset[regno];
92 rtx base_rtx = stack_pointer_rtx;
93 poly_int64 sp_offset = offset;
94
95@@ -8030,7 +8031,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
96 {
97 gcc_assert (known_eq (start_offset, 0));
98 poly_int64 fp_offset
99- = cfun->machine->frame.below_hard_fp_saved_regs_size;
100+ = frame.below_hard_fp_saved_regs_size;
101 if (hard_fp_valid_p)
102 base_rtx = hard_frame_pointer_rtx;
103 else
104@@ -8052,8 +8053,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
105 && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
106 && !cfun->machine->reg_is_wrapped_separately[regno2]
107 && known_eq (GET_MODE_SIZE (mode),
108- cfun->machine->frame.reg_offset[regno2]
109- - cfun->machine->frame.reg_offset[regno]))
110+ frame.reg_offset[regno2] - frame.reg_offset[regno]))
111 {
112 rtx reg2 = gen_rtx_REG (mode, regno2);
113 rtx mem2;
114@@ -8103,6 +8103,7 @@ static void
115 aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
116 unsigned limit, bool skip_wb, rtx *cfi_ops)
117 {
118+ aarch64_frame &frame = cfun->machine->frame;
119 unsigned regno;
120 unsigned regno2;
121 poly_int64 offset;
122@@ -8119,13 +8120,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
123 rtx reg, mem;
124
125 if (skip_wb
126- && (regno == cfun->machine->frame.wb_candidate1
127- || regno == cfun->machine->frame.wb_candidate2))
128+ && (regno == frame.wb_candidate1
129+ || regno == frame.wb_candidate2))
130 continue;
131
132 machine_mode mode = aarch64_reg_save_mode (regno);
133 reg = gen_rtx_REG (mode, regno);
134- offset = start_offset + cfun->machine->frame.reg_offset[regno];
135+ offset = start_offset + frame.reg_offset[regno];
136 rtx base_rtx = stack_pointer_rtx;
137 if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
138 aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
139@@ -8136,8 +8137,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
140 && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
141 && !cfun->machine->reg_is_wrapped_separately[regno2]
142 && known_eq (GET_MODE_SIZE (mode),
143- cfun->machine->frame.reg_offset[regno2]
144- - cfun->machine->frame.reg_offset[regno]))
145+ frame.reg_offset[regno2] - frame.reg_offset[regno]))
146 {
147 rtx reg2 = gen_rtx_REG (mode, regno2);
148 rtx mem2;
149@@ -8242,6 +8242,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
150 static sbitmap
151 aarch64_get_separate_components (void)
152 {
153+ aarch64_frame &frame = cfun->machine->frame;
154 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
155 bitmap_clear (components);
156
157@@ -8258,18 +8259,18 @@ aarch64_get_separate_components (void)
158 if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
159 continue;
160
161- poly_int64 offset = cfun->machine->frame.reg_offset[regno];
162+ poly_int64 offset = frame.reg_offset[regno];
163
164 /* If the register is saved in the first SVE save slot, we use
165 it as a stack probe for -fstack-clash-protection. */
166 if (flag_stack_clash_protection
167- && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)
168+ && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
169 && known_eq (offset, 0))
170 continue;
171
172 /* Get the offset relative to the register we'll use. */
173 if (frame_pointer_needed)
174- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
175+ offset -= frame.below_hard_fp_saved_regs_size;
176 else
177 offset += crtl->outgoing_args_size;
178
179@@ -8288,11 +8289,11 @@ aarch64_get_separate_components (void)
180 /* If the spare predicate register used by big-endian SVE code
181 is call-preserved, it must be saved in the main prologue
182 before any saves that use it. */
183- if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM)
184- bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg);
185+ if (frame.spare_pred_reg != INVALID_REGNUM)
186+ bitmap_clear_bit (components, frame.spare_pred_reg);
187
188- unsigned reg1 = cfun->machine->frame.wb_candidate1;
189- unsigned reg2 = cfun->machine->frame.wb_candidate2;
190+ unsigned reg1 = frame.wb_candidate1;
191+ unsigned reg2 = frame.wb_candidate2;
192 /* If registers have been chosen to be stored/restored with
193 writeback don't interfere with them to avoid having to output explicit
194 stack adjustment instructions. */
195@@ -8401,6 +8402,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
196 static void
197 aarch64_process_components (sbitmap components, bool prologue_p)
198 {
199+ aarch64_frame &frame = cfun->machine->frame;
200 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
201 ? HARD_FRAME_POINTER_REGNUM
202 : STACK_POINTER_REGNUM);
203@@ -8415,9 +8417,9 @@ aarch64_process_components (sbitmap components, bool prologue_p)
204 machine_mode mode = aarch64_reg_save_mode (regno);
205
206 rtx reg = gen_rtx_REG (mode, regno);
207- poly_int64 offset = cfun->machine->frame.reg_offset[regno];
208+ poly_int64 offset = frame.reg_offset[regno];
209 if (frame_pointer_needed)
210- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
211+ offset -= frame.below_hard_fp_saved_regs_size;
212 else
213 offset += crtl->outgoing_args_size;
214
215@@ -8442,14 +8444,14 @@ aarch64_process_components (sbitmap components, bool prologue_p)
216 break;
217 }
218
219- poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
220+ poly_int64 offset2 = frame.reg_offset[regno2];
221 /* The next register is not of the same class or its offset is not
222 mergeable with the current one into a pair. */
223 if (aarch64_sve_mode_p (mode)
224 || !satisfies_constraint_Ump (mem)
225 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
226 || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno))
227- || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
228+ || maybe_ne ((offset2 - frame.reg_offset[regno]),
229 GET_MODE_SIZE (mode)))
230 {
231 insn = emit_insn (set);
232@@ -8471,7 +8473,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
233 /* REGNO2 can be saved/restored in a pair with REGNO. */
234 rtx reg2 = gen_rtx_REG (mode, regno2);
235 if (frame_pointer_needed)
236- offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size;
237+ offset2 -= frame.below_hard_fp_saved_regs_size;
238 else
239 offset2 += crtl->outgoing_args_size;
240 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
241@@ -8566,6 +8568,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
242 bool frame_related_p,
243 bool final_adjustment_p)
244 {
245+ aarch64_frame &frame = cfun->machine->frame;
246 HOST_WIDE_INT guard_size
247 = 1 << param_stack_clash_protection_guard_size;
248 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
249@@ -8586,25 +8589,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
250 register as a probe. We can't assume that LR was saved at position 0
251 though, so treat any space below it as unprobed. */
252 if (final_adjustment_p
253- && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0))
254+ && known_eq (frame.below_hard_fp_saved_regs_size, 0))
255 {
256- poly_int64 lr_offset = cfun->machine->frame.reg_offset[LR_REGNUM];
257+ poly_int64 lr_offset = frame.reg_offset[LR_REGNUM];
258 if (known_ge (lr_offset, 0))
259 min_probe_threshold -= lr_offset.to_constant ();
260 else
261 gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
262 }
263
264- poly_int64 frame_size = cfun->machine->frame.frame_size;
265+ poly_int64 frame_size = frame.frame_size;
266
267 /* We should always have a positive probe threshold. */
268 gcc_assert (min_probe_threshold > 0);
269
270 if (flag_stack_clash_protection && !final_adjustment_p)
271 {
272- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
273- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
274- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
275+ poly_int64 initial_adjust = frame.initial_adjust;
276+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
277+ poly_int64 final_adjust = frame.final_adjust;
278
279 if (known_eq (frame_size, 0))
280 {
281@@ -8893,17 +8896,18 @@ aarch64_epilogue_uses (int regno)
282 void
283 aarch64_expand_prologue (void)
284 {
285- poly_int64 frame_size = cfun->machine->frame.frame_size;
286- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
287- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
288- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
289- poly_int64 callee_offset = cfun->machine->frame.callee_offset;
290- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
291+ aarch64_frame &frame = cfun->machine->frame;
292+ poly_int64 frame_size = frame.frame_size;
293+ poly_int64 initial_adjust = frame.initial_adjust;
294+ HOST_WIDE_INT callee_adjust = frame.callee_adjust;
295+ poly_int64 final_adjust = frame.final_adjust;
296+ poly_int64 callee_offset = frame.callee_offset;
297+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
298 poly_int64 below_hard_fp_saved_regs_size
299- = cfun->machine->frame.below_hard_fp_saved_regs_size;
300- unsigned reg1 = cfun->machine->frame.wb_candidate1;
301- unsigned reg2 = cfun->machine->frame.wb_candidate2;
302- bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
303+ = frame.below_hard_fp_saved_regs_size;
304+ unsigned reg1 = frame.wb_candidate1;
305+ unsigned reg2 = frame.wb_candidate2;
306+ bool emit_frame_chain = frame.emit_frame_chain;
307 rtx_insn *insn;
308
309 if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
310@@ -8969,7 +8973,7 @@ aarch64_expand_prologue (void)
311
312 /* The offset of the frame chain record (if any) from the current SP. */
313 poly_int64 chain_offset = (initial_adjust + callee_adjust
314- - cfun->machine->frame.hard_fp_offset);
315+ - frame.hard_fp_offset);
316 gcc_assert (known_ge (chain_offset, 0));
317
318 /* The offset of the bottom of the save area from the current SP. */
319@@ -9072,15 +9076,16 @@ aarch64_use_return_insn_p (void)
320 void
321 aarch64_expand_epilogue (bool for_sibcall)
322 {
323- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
324- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
325- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
326- poly_int64 callee_offset = cfun->machine->frame.callee_offset;
327- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
328+ aarch64_frame &frame = cfun->machine->frame;
329+ poly_int64 initial_adjust = frame.initial_adjust;
330+ HOST_WIDE_INT callee_adjust = frame.callee_adjust;
331+ poly_int64 final_adjust = frame.final_adjust;
332+ poly_int64 callee_offset = frame.callee_offset;
333+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
334 poly_int64 below_hard_fp_saved_regs_size
335- = cfun->machine->frame.below_hard_fp_saved_regs_size;
336- unsigned reg1 = cfun->machine->frame.wb_candidate1;
337- unsigned reg2 = cfun->machine->frame.wb_candidate2;
338+ = frame.below_hard_fp_saved_regs_size;
339+ unsigned reg1 = frame.wb_candidate1;
340+ unsigned reg2 = frame.wb_candidate2;
341 rtx cfi_ops = NULL;
342 rtx_insn *insn;
343 /* A stack clash protection prologue may not have left EP0_REGNUM or
344@@ -9113,7 +9118,7 @@ aarch64_expand_epilogue (bool for_sibcall)
345 /* We need to add memory barrier to prevent read from deallocated stack. */
346 bool need_barrier_p
347 = maybe_ne (get_frame_size ()
348- + cfun->machine->frame.saved_varargs_size, 0);
349+ + frame.saved_varargs_size, 0);
350
351 /* Emit a barrier to prevent loads from a deallocated stack. */
352 if (maybe_gt (final_adjust, crtl->outgoing_args_size)
353@@ -11744,24 +11749,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
354 poly_int64
355 aarch64_initial_elimination_offset (unsigned from, unsigned to)
356 {
357+ aarch64_frame &frame = cfun->machine->frame;
358+
359 if (to == HARD_FRAME_POINTER_REGNUM)
360 {
361 if (from == ARG_POINTER_REGNUM)
362- return cfun->machine->frame.hard_fp_offset;
363+ return frame.hard_fp_offset;
364
365 if (from == FRAME_POINTER_REGNUM)
366- return cfun->machine->frame.hard_fp_offset
367- - cfun->machine->frame.locals_offset;
368+ return frame.hard_fp_offset - frame.locals_offset;
369 }
370
371 if (to == STACK_POINTER_REGNUM)
372 {
373 if (from == FRAME_POINTER_REGNUM)
374- return cfun->machine->frame.frame_size
375- - cfun->machine->frame.locals_offset;
376+ return frame.frame_size - frame.locals_offset;
377 }
378
379- return cfun->machine->frame.frame_size;
380+ return frame.frame_size;
381 }
382
383
384--
3852.34.1
386
387
388From a2a57f7ec7912e77eb26919545807d90065584ff Mon Sep 17 00:00:00 2001
389From: Richard Sandiford <richard.sandiford@arm.com>
390Date: Tue, 12 Sep 2023 16:19:44 +0100
391Subject: [PATCH 02/19] aarch64: Avoid a use of callee_offset
392
393When we emit the frame chain, i.e. when we reach Here in this statement
394of aarch64_expand_prologue:
395
396 if (emit_frame_chain)
397 {
398 // Here
399 ...
400 }
401
402the stack is in one of two states:
403
404- We've allocated up to the frame chain, but no more.
405
406- We've allocated the whole frame, and the frame chain is within easy
407 reach of the new SP.
408
409The offset of the frame chain from the current SP is available
410in aarch64_frame as callee_offset. It is also available as the
411chain_offset local variable, where the latter is calculated from other
412data. (However, chain_offset is not always equal to callee_offset when
413!emit_frame_chain, so chain_offset isn't redundant.)
414
415In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using
416chain_offset for the initialisation of the hard frame pointer:
417
418 aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
419- stack_pointer_rtx, callee_offset,
420+ stack_pointer_rtx, chain_offset,
421 tmp1_rtx, tmp0_rtx, frame_pointer_needed);
422
423But the later REG_CFA_ADJUST_CFA handling still used callee_offset.
424
425I think the difference is harmless, but it's more logical for the
426CFA note to be in sync, and it's more convenient for later patches
427if it uses chain_offset.
428
429gcc/
430 * config/aarch64/aarch64.c (aarch64_expand_prologue): Use
431 chain_offset rather than callee_offset.
432---
433 gcc/config/aarch64/aarch64.c | 4 +---
434 1 file changed, 1 insertion(+), 3 deletions(-)
435
436diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
437index 77c1d1300a5..6bc026bd08f 100644
438--- a/gcc/config/aarch64/aarch64.c
439+++ b/gcc/config/aarch64/aarch64.c
440@@ -8901,7 +8901,6 @@ aarch64_expand_prologue (void)
441 poly_int64 initial_adjust = frame.initial_adjust;
442 HOST_WIDE_INT callee_adjust = frame.callee_adjust;
443 poly_int64 final_adjust = frame.final_adjust;
444- poly_int64 callee_offset = frame.callee_offset;
445 poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
446 poly_int64 below_hard_fp_saved_regs_size
447 = frame.below_hard_fp_saved_regs_size;
448@@ -9010,8 +9009,7 @@ aarch64_expand_prologue (void)
449 implicit. */
450 if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
451 {
452- rtx src = plus_constant (Pmode, stack_pointer_rtx,
453- callee_offset);
454+ rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset);
455 add_reg_note (insn, REG_CFA_ADJUST_CFA,
456 gen_rtx_SET (hard_frame_pointer_rtx, src));
457 }
458--
4592.34.1
460
461
462From 5efdcc8ed19d9d9e708a001f5dc695560411496d Mon Sep 17 00:00:00 2001
463From: Richard Sandiford <richard.sandiford@arm.com>
464Date: Tue, 12 Sep 2023 16:19:44 +0100
465Subject: [PATCH 03/19] aarch64: Explicitly handle frames with no saved
466 registers
467
468If a frame has no saved registers, it can be allocated in one go.
469There is no need to treat the areas below and above the saved
470registers as separate.
471
472And if we allocate the frame in one go, it should be allocated
473as the initial_adjust rather than the final_adjust. This allows the
474frame size to grow to guard_size - guard_used_by_caller before a stack
475probe is needed. (A frame with no register saves is necessarily a
476leaf frame.)
477
478This is a no-op as thing stand, since a leaf function will have
479no outgoing arguments, and so all the frame will be above where
480the saved registers normally go.
481
482gcc/
483 * config/aarch64/aarch64.c (aarch64_layout_frame): Explicitly
484 allocate the frame in one go if there are no saved registers.
485---
486 gcc/config/aarch64/aarch64.c | 8 +++++---
487 1 file changed, 5 insertions(+), 3 deletions(-)
488
489diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
490index 6bc026bd08f..05e6ae8c0c9 100644
491--- a/gcc/config/aarch64/aarch64.c
492+++ b/gcc/config/aarch64/aarch64.c
493@@ -7609,9 +7609,11 @@ aarch64_layout_frame (void)
494
495 HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
496 HOST_WIDE_INT const_saved_regs_size;
497- if (frame.frame_size.is_constant (&const_size)
498- && const_size < max_push_offset
499- && known_eq (frame.hard_fp_offset, const_size))
500+ if (known_eq (frame.saved_regs_size, 0))
501+ frame.initial_adjust = frame.frame_size;
502+ else if (frame.frame_size.is_constant (&const_size)
503+ && const_size < max_push_offset
504+ && known_eq (frame.hard_fp_offset, const_size))
505 {
506 /* Simple, small frame with no outgoing arguments:
507
508--
5092.34.1
510
511
512From a8385d14318634f2e3a08a75bd2d6e2810f8cec9 Mon Sep 17 00:00:00 2001
513From: Richard Sandiford <richard.sandiford@arm.com>
514Date: Tue, 12 Sep 2023 16:19:45 +0100
515Subject: [PATCH 04/19] aarch64: Add bytes_below_saved_regs to frame info
516
517The frame layout code currently hard-codes the assumption that
518the number of bytes below the saved registers is equal to the
519size of the outgoing arguments. This patch abstracts that
520value into a new field of aarch64_frame.
521
522gcc/
523 * config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New
524 field.
525 * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize it,
526 and use it instead of crtl->outgoing_args_size.
527 (aarch64_get_separate_components): Use bytes_below_saved_regs instead
528 of outgoing_args_size.
529 (aarch64_process_components): Likewise.
530---
531 gcc/config/aarch64/aarch64.c | 71 ++++++++++++++++++------------------
532 gcc/config/aarch64/aarch64.h | 5 +++
533 2 files changed, 41 insertions(+), 35 deletions(-)
534
535diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
536index 05e6ae8c0c9..8fa5a0b2545 100644
537--- a/gcc/config/aarch64/aarch64.c
538+++ b/gcc/config/aarch64/aarch64.c
539@@ -7476,6 +7476,8 @@ aarch64_layout_frame (void)
540 gcc_assert (crtl->is_leaf
541 || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
542
543+ frame.bytes_below_saved_regs = crtl->outgoing_args_size;
544+
545 /* Now assign stack slots for the registers. Start with the predicate
546 registers, since predicate LDR and STR have a relatively small
547 offset range. These saves happen below the hard frame pointer. */
548@@ -7580,18 +7582,18 @@ aarch64_layout_frame (void)
549
550 poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
551
552- poly_int64 above_outgoing_args
553+ poly_int64 saved_regs_and_above
554 = aligned_upper_bound (varargs_and_saved_regs_size
555 + get_frame_size (),
556 STACK_BOUNDARY / BITS_PER_UNIT);
557
558 frame.hard_fp_offset
559- = above_outgoing_args - frame.below_hard_fp_saved_regs_size;
560+ = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
561
562 /* Both these values are already aligned. */
563- gcc_assert (multiple_p (crtl->outgoing_args_size,
564+ gcc_assert (multiple_p (frame.bytes_below_saved_regs,
565 STACK_BOUNDARY / BITS_PER_UNIT));
566- frame.frame_size = above_outgoing_args + crtl->outgoing_args_size;
567+ frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
568
569 frame.locals_offset = frame.saved_varargs_size;
570
571@@ -7607,7 +7609,7 @@ aarch64_layout_frame (void)
572 else if (frame.wb_candidate1 != INVALID_REGNUM)
573 max_push_offset = 256;
574
575- HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
576+ HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
577 HOST_WIDE_INT const_saved_regs_size;
578 if (known_eq (frame.saved_regs_size, 0))
579 frame.initial_adjust = frame.frame_size;
580@@ -7615,31 +7617,31 @@ aarch64_layout_frame (void)
581 && const_size < max_push_offset
582 && known_eq (frame.hard_fp_offset, const_size))
583 {
584- /* Simple, small frame with no outgoing arguments:
585+ /* Simple, small frame with no data below the saved registers.
586
587 stp reg1, reg2, [sp, -frame_size]!
588 stp reg3, reg4, [sp, 16] */
589 frame.callee_adjust = const_size;
590 }
591- else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size)
592+ else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
593 && frame.saved_regs_size.is_constant (&const_saved_regs_size)
594- && const_outgoing_args_size + const_saved_regs_size < 512
595- /* We could handle this case even with outgoing args, provided
596- that the number of args left us with valid offsets for all
597- predicate and vector save slots. It's such a rare case that
598- it hardly seems worth the effort though. */
599- && (!saves_below_hard_fp_p || const_outgoing_args_size == 0)
600+ && const_below_saved_regs + const_saved_regs_size < 512
601+ /* We could handle this case even with data below the saved
602+ registers, provided that that data left us with valid offsets
603+ for all predicate and vector save slots. It's such a rare
604+ case that it hardly seems worth the effort though. */
605+ && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
606 && !(cfun->calls_alloca
607 && frame.hard_fp_offset.is_constant (&const_fp_offset)
608 && const_fp_offset < max_push_offset))
609 {
610- /* Frame with small outgoing arguments:
611+ /* Frame with small area below the saved registers:
612
613 sub sp, sp, frame_size
614- stp reg1, reg2, [sp, outgoing_args_size]
615- stp reg3, reg4, [sp, outgoing_args_size + 16] */
616+ stp reg1, reg2, [sp, bytes_below_saved_regs]
617+ stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */
618 frame.initial_adjust = frame.frame_size;
619- frame.callee_offset = const_outgoing_args_size;
620+ frame.callee_offset = const_below_saved_regs;
621 }
622 else if (saves_below_hard_fp_p
623 && known_eq (frame.saved_regs_size,
624@@ -7649,30 +7651,29 @@ aarch64_layout_frame (void)
625
626 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
627 save SVE registers relative to SP
628- sub sp, sp, outgoing_args_size */
629+ sub sp, sp, bytes_below_saved_regs */
630 frame.initial_adjust = (frame.hard_fp_offset
631 + frame.below_hard_fp_saved_regs_size);
632- frame.final_adjust = crtl->outgoing_args_size;
633+ frame.final_adjust = frame.bytes_below_saved_regs;
634 }
635 else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
636 && const_fp_offset < max_push_offset)
637 {
638- /* Frame with large outgoing arguments or SVE saves, but with
639- a small local area:
640+ /* Frame with large area below the saved registers, or with SVE saves,
641+ but with a small area above:
642
643 stp reg1, reg2, [sp, -hard_fp_offset]!
644 stp reg3, reg4, [sp, 16]
645 [sub sp, sp, below_hard_fp_saved_regs_size]
646 [save SVE registers relative to SP]
647- sub sp, sp, outgoing_args_size */
648+ sub sp, sp, bytes_below_saved_regs */
649 frame.callee_adjust = const_fp_offset;
650 frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
651- frame.final_adjust = crtl->outgoing_args_size;
652+ frame.final_adjust = frame.bytes_below_saved_regs;
653 }
654 else
655 {
656- /* Frame with large local area and outgoing arguments or SVE saves,
657- using frame pointer:
658+ /* General case:
659
660 sub sp, sp, hard_fp_offset
661 stp x29, x30, [sp, 0]
662@@ -7680,10 +7681,10 @@ aarch64_layout_frame (void)
663 stp reg3, reg4, [sp, 16]
664 [sub sp, sp, below_hard_fp_saved_regs_size]
665 [save SVE registers relative to SP]
666- sub sp, sp, outgoing_args_size */
667+ sub sp, sp, bytes_below_saved_regs */
668 frame.initial_adjust = frame.hard_fp_offset;
669 frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
670- frame.final_adjust = crtl->outgoing_args_size;
671+ frame.final_adjust = frame.bytes_below_saved_regs;
672 }
673
674 /* Make sure the individual adjustments add up to the full frame size. */
675@@ -8274,7 +8275,7 @@ aarch64_get_separate_components (void)
676 if (frame_pointer_needed)
677 offset -= frame.below_hard_fp_saved_regs_size;
678 else
679- offset += crtl->outgoing_args_size;
680+ offset += frame.bytes_below_saved_regs;
681
682 /* Check that we can access the stack slot of the register with one
683 direct load with no adjustments needed. */
684@@ -8423,7 +8424,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
685 if (frame_pointer_needed)
686 offset -= frame.below_hard_fp_saved_regs_size;
687 else
688- offset += crtl->outgoing_args_size;
689+ offset += frame.bytes_below_saved_regs;
690
691 rtx addr = plus_constant (Pmode, ptr_reg, offset);
692 rtx mem = gen_frame_mem (mode, addr);
693@@ -8477,7 +8478,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
694 if (frame_pointer_needed)
695 offset2 -= frame.below_hard_fp_saved_regs_size;
696 else
697- offset2 += crtl->outgoing_args_size;
698+ offset2 += frame.bytes_below_saved_regs;
699 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
700 rtx mem2 = gen_frame_mem (mode, addr2);
701 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
702@@ -8551,10 +8552,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
703 registers. If POLY_SIZE is not large enough to require a probe this function
704 will only adjust the stack. When allocating the stack space
705 FRAME_RELATED_P is then used to indicate if the allocation is frame related.
706- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
707- arguments. If we are then we ensure that any allocation larger than the ABI
708- defined buffer needs a probe so that the invariant of having a 1KB buffer is
709- maintained.
710+ FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
711+ the saved registers. If we are then we ensure that any allocation
712+ larger than the ABI defined buffer needs a probe so that the
713+ invariant of having a 1KB buffer is maintained.
714
715 We emit barriers after each stack adjustment to prevent optimizations from
716 breaking the invariant that we never drop the stack more than a page. This
717@@ -8763,7 +8764,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
718 /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
719 be probed. This maintains the requirement that each page is probed at
720 least once. For initial probing we probe only if the allocation is
721- more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
722+ more than GUARD_SIZE - buffer, and below the saved registers we probe
723 if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
724 GUARD_SIZE. This works that for any allocation that is large enough to
725 trigger a probe here, we'll have at least one, and if they're not large
726diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
727index bb383acfae8..6f0b8c7107e 100644
728--- a/gcc/config/aarch64/aarch64.h
729+++ b/gcc/config/aarch64/aarch64.h
730@@ -837,6 +837,11 @@ struct GTY (()) aarch64_frame
731 /* The size of the callee-save registers with a slot in REG_OFFSET. */
732 poly_int64 saved_regs_size;
733
734+ /* The number of bytes between the bottom of the static frame (the bottom
735+ of the outgoing arguments) and the bottom of the register save area.
736+ This value is always a multiple of STACK_BOUNDARY. */
737+ poly_int64 bytes_below_saved_regs;
738+
739 /* The size of the callee-save registers with a slot in REG_OFFSET that
740 are saved below the hard frame pointer. */
741 poly_int64 below_hard_fp_saved_regs_size;
742--
7432.34.1
744
745
746From d3f6ceecc8a7f128a9e6cb7d8aecf0de81ed9705 Mon Sep 17 00:00:00 2001
747From: Richard Sandiford <richard.sandiford@arm.com>
748Date: Tue, 12 Sep 2023 16:19:45 +0100
749Subject: [PATCH 05/19] aarch64: Add bytes_below_hard_fp to frame info
750
751Following on from the previous bytes_below_saved_regs patch, this one
752records the number of bytes that are below the hard frame pointer.
753This eventually replaces below_hard_fp_saved_regs_size.
754
755If a frame pointer is not needed, the epilogue adds final_adjust
756to the stack pointer before restoring registers:
757
758 aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true);
759
760Therefore, if the epilogue needs to restore the stack pointer from
761the hard frame pointer, the directly corresponding offset is:
762
763 -bytes_below_hard_fp + final_adjust
764
765i.e. go from the hard frame pointer to the bottom of the frame,
766then add the same amount as if we were using the stack pointer
767from the outset.
768
769gcc/
770 * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New
771 field.
772 * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize it.
773 (aarch64_expand_epilogue): Use it instead of
774 below_hard_fp_saved_regs_size.
775---
776 gcc/config/aarch64/aarch64.c | 6 +++---
777 gcc/config/aarch64/aarch64.h | 5 +++++
778 2 files changed, 8 insertions(+), 3 deletions(-)
779
780diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
781index 8fa5a0b2545..e03adf57226 100644
782--- a/gcc/config/aarch64/aarch64.c
783+++ b/gcc/config/aarch64/aarch64.c
784@@ -7528,6 +7528,7 @@ aarch64_layout_frame (void)
785 of the callee save area. */
786 bool saves_below_hard_fp_p = maybe_ne (offset, 0);
787 frame.below_hard_fp_saved_regs_size = offset;
788+ frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
789 if (frame.emit_frame_chain)
790 {
791 /* FP and LR are placed in the linkage record. */
792@@ -9083,8 +9084,7 @@ aarch64_expand_epilogue (bool for_sibcall)
793 poly_int64 final_adjust = frame.final_adjust;
794 poly_int64 callee_offset = frame.callee_offset;
795 poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
796- poly_int64 below_hard_fp_saved_regs_size
797- = frame.below_hard_fp_saved_regs_size;
798+ poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
799 unsigned reg1 = frame.wb_candidate1;
800 unsigned reg2 = frame.wb_candidate2;
801 rtx cfi_ops = NULL;
802@@ -9140,7 +9140,7 @@ aarch64_expand_epilogue (bool for_sibcall)
803 is restored on the instruction doing the writeback. */
804 aarch64_add_offset (Pmode, stack_pointer_rtx,
805 hard_frame_pointer_rtx,
806- -callee_offset - below_hard_fp_saved_regs_size,
807+ -bytes_below_hard_fp + final_adjust,
808 tmp1_rtx, tmp0_rtx, callee_adjust == 0);
809 else
810 /* The case where we need to re-use the register here is very rare, so
811diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
812index 6f0b8c7107e..21ac920a3fe 100644
813--- a/gcc/config/aarch64/aarch64.h
814+++ b/gcc/config/aarch64/aarch64.h
815@@ -846,6 +846,11 @@ struct GTY (()) aarch64_frame
816 are saved below the hard frame pointer. */
817 poly_int64 below_hard_fp_saved_regs_size;
818
819+ /* The number of bytes between the bottom of the static frame (the bottom
820+ of the outgoing arguments) and the hard frame pointer. This value is
821+ always a multiple of STACK_BOUNDARY. */
822+ poly_int64 bytes_below_hard_fp;
823+
824 /* Offset from the base of the frame (incomming SP) to the
825 top of the locals area. This value is always a multiple of
826 STACK_BOUNDARY. */
827--
8282.34.1
829
830
831From e8a7ec87fcdbaa5f7c7bd499aebe5cefacbf8687 Mon Sep 17 00:00:00 2001
832From: Richard Sandiford <richard.sandiford@arm.com>
833Date: Tue, 12 Sep 2023 16:19:46 +0100
834Subject: [PATCH 06/19] aarch64: Tweak aarch64_save/restore_callee_saves
835
836aarch64_save_callee_saves and aarch64_restore_callee_saves took
837a parameter called start_offset that gives the offset of the
838bottom of the saved register area from the current stack pointer.
839However, it's more convenient for later patches if we use the
840bottom of the entire frame as the reference point, rather than
841the bottom of the saved registers.
842
843Doing that removes the need for the callee_offset field.
844Other than that, this is not a win on its own. It only really
845makes sense in combination with the follow-on patches.
846
847gcc/
848 * config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete.
849 * config/aarch64/aarch64.c (aarch64_layout_frame): Remove
850 callee_offset handling.
851 (aarch64_save_callee_saves): Replace the start_offset parameter
852 with a bytes_below_sp parameter.
853 (aarch64_restore_callee_saves): Likewise.
854 (aarch64_expand_prologue): Update accordingly.
855 (aarch64_expand_epilogue): Likewise.
856---
857 gcc/config/aarch64/aarch64.c | 56 ++++++++++++++++++------------------
858 gcc/config/aarch64/aarch64.h | 4 ---
859 2 files changed, 28 insertions(+), 32 deletions(-)
860
861diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
862index e03adf57226..96e99f6c17a 100644
863--- a/gcc/config/aarch64/aarch64.c
864+++ b/gcc/config/aarch64/aarch64.c
865@@ -7602,7 +7602,6 @@ aarch64_layout_frame (void)
866 frame.final_adjust = 0;
867 frame.callee_adjust = 0;
868 frame.sve_callee_adjust = 0;
869- frame.callee_offset = 0;
870
871 HOST_WIDE_INT max_push_offset = 0;
872 if (frame.wb_candidate2 != INVALID_REGNUM)
873@@ -7642,7 +7641,6 @@ aarch64_layout_frame (void)
874 stp reg1, reg2, [sp, bytes_below_saved_regs]
875 stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */
876 frame.initial_adjust = frame.frame_size;
877- frame.callee_offset = const_below_saved_regs;
878 }
879 else if (saves_below_hard_fp_p
880 && known_eq (frame.saved_regs_size,
881@@ -7989,12 +7987,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
882 }
883
884 /* Emit code to save the callee-saved registers from register number START
885- to LIMIT to the stack at the location starting at offset START_OFFSET,
886- skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P
887- is true if the hard frame pointer has been set up. */
888+ to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP
889+ bytes above the bottom of the static frame. Skip any write-back
890+ candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard
891+ frame pointer has been set up. */
892
893 static void
894-aarch64_save_callee_saves (poly_int64 start_offset,
895+aarch64_save_callee_saves (poly_int64 bytes_below_sp,
896 unsigned start, unsigned limit, bool skip_wb,
897 bool hard_fp_valid_p)
898 {
899@@ -8022,7 +8021,9 @@ aarch64_save_callee_saves (poly_int64 start_offset,
900
901 machine_mode mode = aarch64_reg_save_mode (regno);
902 reg = gen_rtx_REG (mode, regno);
903- offset = start_offset + frame.reg_offset[regno];
904+ offset = (frame.reg_offset[regno]
905+ + frame.bytes_below_saved_regs
906+ - bytes_below_sp);
907 rtx base_rtx = stack_pointer_rtx;
908 poly_int64 sp_offset = offset;
909
910@@ -8033,9 +8034,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
911 else if (GP_REGNUM_P (regno)
912 && (!offset.is_constant (&const_offset) || const_offset >= 512))
913 {
914- gcc_assert (known_eq (start_offset, 0));
915- poly_int64 fp_offset
916- = frame.below_hard_fp_saved_regs_size;
917+ poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp;
918 if (hard_fp_valid_p)
919 base_rtx = hard_frame_pointer_rtx;
920 else
921@@ -8099,12 +8098,13 @@ aarch64_save_callee_saves (poly_int64 start_offset,
922 }
923
924 /* Emit code to restore the callee registers from register number START
925- up to and including LIMIT. Restore from the stack offset START_OFFSET,
926- skipping any write-back candidates if SKIP_WB is true. Write the
927- appropriate REG_CFA_RESTORE notes into CFI_OPS. */
928+ up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP
929+ bytes above the bottom of the static frame. Skip any write-back
930+ candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE
931+ notes into CFI_OPS. */
932
933 static void
934-aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
935+aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
936 unsigned limit, bool skip_wb, rtx *cfi_ops)
937 {
938 aarch64_frame &frame = cfun->machine->frame;
939@@ -8130,7 +8130,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
940
941 machine_mode mode = aarch64_reg_save_mode (regno);
942 reg = gen_rtx_REG (mode, regno);
943- offset = start_offset + frame.reg_offset[regno];
944+ offset = (frame.reg_offset[regno]
945+ + frame.bytes_below_saved_regs
946+ - bytes_below_sp);
947 rtx base_rtx = stack_pointer_rtx;
948 if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
949 aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
950@@ -8906,8 +8908,6 @@ aarch64_expand_prologue (void)
951 HOST_WIDE_INT callee_adjust = frame.callee_adjust;
952 poly_int64 final_adjust = frame.final_adjust;
953 poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
954- poly_int64 below_hard_fp_saved_regs_size
955- = frame.below_hard_fp_saved_regs_size;
956 unsigned reg1 = frame.wb_candidate1;
957 unsigned reg2 = frame.wb_candidate2;
958 bool emit_frame_chain = frame.emit_frame_chain;
959@@ -8979,8 +8979,8 @@ aarch64_expand_prologue (void)
960 - frame.hard_fp_offset);
961 gcc_assert (known_ge (chain_offset, 0));
962
963- /* The offset of the bottom of the save area from the current SP. */
964- poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size;
965+ /* The offset of the current SP from the bottom of the static frame. */
966+ poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
967
968 if (emit_frame_chain)
969 {
970@@ -8988,7 +8988,7 @@ aarch64_expand_prologue (void)
971 {
972 reg1 = R29_REGNUM;
973 reg2 = R30_REGNUM;
974- aarch64_save_callee_saves (saved_regs_offset, reg1, reg2,
975+ aarch64_save_callee_saves (bytes_below_sp, reg1, reg2,
976 false, false);
977 }
978 else
979@@ -9028,7 +9028,7 @@ aarch64_expand_prologue (void)
980 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
981 }
982
983- aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
984+ aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM,
985 callee_adjust != 0 || emit_frame_chain,
986 emit_frame_chain);
987 if (maybe_ne (sve_callee_adjust, 0))
988@@ -9038,16 +9038,17 @@ aarch64_expand_prologue (void)
989 aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
990 sve_callee_adjust,
991 !frame_pointer_needed, false);
992- saved_regs_offset += sve_callee_adjust;
993+ bytes_below_sp -= sve_callee_adjust;
994 }
995- aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM,
996+ aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM,
997 false, emit_frame_chain);
998- aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM,
999+ aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM,
1000 callee_adjust != 0 || emit_frame_chain,
1001 emit_frame_chain);
1002
1003 /* We may need to probe the final adjustment if it is larger than the guard
1004 that is assumed by the called. */
1005+ gcc_assert (known_eq (bytes_below_sp, final_adjust));
1006 aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
1007 !frame_pointer_needed, true);
1008 }
1009@@ -9082,7 +9083,6 @@ aarch64_expand_epilogue (bool for_sibcall)
1010 poly_int64 initial_adjust = frame.initial_adjust;
1011 HOST_WIDE_INT callee_adjust = frame.callee_adjust;
1012 poly_int64 final_adjust = frame.final_adjust;
1013- poly_int64 callee_offset = frame.callee_offset;
1014 poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
1015 poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
1016 unsigned reg1 = frame.wb_candidate1;
1017@@ -9150,13 +9150,13 @@ aarch64_expand_epilogue (bool for_sibcall)
1018
1019 /* Restore the vector registers before the predicate registers,
1020 so that we can use P4 as a temporary for big-endian SVE frames. */
1021- aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM,
1022+ aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM,
1023 callee_adjust != 0, &cfi_ops);
1024- aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM,
1025+ aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM,
1026 false, &cfi_ops);
1027 if (maybe_ne (sve_callee_adjust, 0))
1028 aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
1029- aarch64_restore_callee_saves (callee_offset - sve_callee_adjust,
1030+ aarch64_restore_callee_saves (final_adjust + sve_callee_adjust,
1031 R0_REGNUM, R30_REGNUM,
1032 callee_adjust != 0, &cfi_ops);
1033
1034diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
1035index 21ac920a3fe..57e67217745 100644
1036--- a/gcc/config/aarch64/aarch64.h
1037+++ b/gcc/config/aarch64/aarch64.h
1038@@ -873,10 +873,6 @@ struct GTY (()) aarch64_frame
1039 It is zero when no push is used. */
1040 HOST_WIDE_INT callee_adjust;
1041
1042- /* The offset from SP to the callee-save registers after initial_adjust.
1043- It may be non-zero if no push is used (ie. callee_adjust == 0). */
1044- poly_int64 callee_offset;
1045-
1046 /* The size of the stack adjustment before saving or after restoring
1047 SVE registers. */
1048 poly_int64 sve_callee_adjust;
1049--
10502.34.1
1051
1052
1053From 7356df0319aefe4c68ef57ec4c6bd18c72188a34 Mon Sep 17 00:00:00 2001
1054From: Richard Sandiford <richard.sandiford@arm.com>
1055Date: Tue, 12 Sep 2023 16:19:46 +0100
1056Subject: [PATCH 07/19] aarch64: Only calculate chain_offset if there is a
1057 chain
1058
1059After previous patches, it is no longer necessary to calculate
1060a chain_offset in cases where there is no chain record.
1061
1062gcc/
1063 * config/aarch64/aarch64.c (aarch64_expand_prologue): Move the
1064 calculation of chain_offset into the emit_frame_chain block.
1065---
1066 gcc/config/aarch64/aarch64.c | 10 +++++-----
1067 1 file changed, 5 insertions(+), 5 deletions(-)
1068
1069diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
1070index 96e99f6c17a..cf5244b7ec0 100644
1071--- a/gcc/config/aarch64/aarch64.c
1072+++ b/gcc/config/aarch64/aarch64.c
1073@@ -8974,16 +8974,16 @@ aarch64_expand_prologue (void)
1074 if (callee_adjust != 0)
1075 aarch64_push_regs (reg1, reg2, callee_adjust);
1076
1077- /* The offset of the frame chain record (if any) from the current SP. */
1078- poly_int64 chain_offset = (initial_adjust + callee_adjust
1079- - frame.hard_fp_offset);
1080- gcc_assert (known_ge (chain_offset, 0));
1081-
1082 /* The offset of the current SP from the bottom of the static frame. */
1083 poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
1084
1085 if (emit_frame_chain)
1086 {
1087+ /* The offset of the frame chain record (if any) from the current SP. */
1088+ poly_int64 chain_offset = (initial_adjust + callee_adjust
1089+ - frame.hard_fp_offset);
1090+ gcc_assert (known_ge (chain_offset, 0));
1091+
1092 if (callee_adjust == 0)
1093 {
1094 reg1 = R29_REGNUM;
1095--
10962.34.1
1097
1098
1099From 82fb69e75c21010f7afc72bb842751164fe8fc72 Mon Sep 17 00:00:00 2001
1100From: Richard Sandiford <richard.sandiford@arm.com>
1101Date: Tue, 12 Sep 2023 16:19:46 +0100
1102Subject: [PATCH 08/19] aarch64: Rename locals_offset to bytes_above_locals
1103MIME-Version: 1.0
1104Content-Type: text/plain; charset=UTF-8
1105Content-Transfer-Encoding: 8bit
1106
1107locals_offset was described as:
1108
1109 /* Offset from the base of the frame (incomming SP) to the
1110 top of the locals area. This value is always a multiple of
1111 STACK_BOUNDARY. */
1112
1113This is implicitly an “upside down” view of the frame: the incoming
1114SP is at offset 0, and anything N bytes below the incoming SP is at
1115offset N (rather than -N).
1116
1117However, reg_offset instead uses a “right way up” view; that is,
1118it views offsets in address terms. Something above X is at a
1119positive offset from X and something below X is at a negative
1120offset from X.
1121
1122Also, even on FRAME_GROWS_DOWNWARD targets like AArch64,
1123target-independent code views offsets in address terms too:
1124locals are allocated at negative offsets to virtual_stack_vars.
1125
1126It seems confusing to have *_offset fields of the same structure
1127using different polarities like this. This patch tries to avoid
1128that by renaming locals_offset to bytes_above_locals.
1129
1130gcc/
1131 * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to...
1132 (aarch64_frame::bytes_above_locals): ...this.
1133 * config/aarch64/aarch64.c (aarch64_layout_frame)
1134 (aarch64_initial_elimination_offset): Update accordingly.
1135---
1136 gcc/config/aarch64/aarch64.c | 6 +++---
1137 gcc/config/aarch64/aarch64.h | 6 +++---
1138 2 files changed, 6 insertions(+), 6 deletions(-)
1139
1140diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
1141index cf5244b7ec0..d54f7a89306 100644
1142--- a/gcc/config/aarch64/aarch64.c
1143+++ b/gcc/config/aarch64/aarch64.c
1144@@ -7596,7 +7596,7 @@ aarch64_layout_frame (void)
1145 STACK_BOUNDARY / BITS_PER_UNIT));
1146 frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
1147
1148- frame.locals_offset = frame.saved_varargs_size;
1149+ frame.bytes_above_locals = frame.saved_varargs_size;
1150
1151 frame.initial_adjust = 0;
1152 frame.final_adjust = 0;
1153@@ -11758,13 +11758,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
1154 return frame.hard_fp_offset;
1155
1156 if (from == FRAME_POINTER_REGNUM)
1157- return frame.hard_fp_offset - frame.locals_offset;
1158+ return frame.hard_fp_offset - frame.bytes_above_locals;
1159 }
1160
1161 if (to == STACK_POINTER_REGNUM)
1162 {
1163 if (from == FRAME_POINTER_REGNUM)
1164- return frame.frame_size - frame.locals_offset;
1165+ return frame.frame_size - frame.bytes_above_locals;
1166 }
1167
1168 return frame.frame_size;
1169diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
1170index 57e67217745..3c5e3dd429d 100644
1171--- a/gcc/config/aarch64/aarch64.h
1172+++ b/gcc/config/aarch64/aarch64.h
1173@@ -851,10 +851,10 @@ struct GTY (()) aarch64_frame
1174 always a multiple of STACK_BOUNDARY. */
1175 poly_int64 bytes_below_hard_fp;
1176
1177- /* Offset from the base of the frame (incomming SP) to the
1178- top of the locals area. This value is always a multiple of
1179+ /* The number of bytes between the top of the locals area and the top
1180+ of the frame (the incomming SP). This value is always a multiple of
1181 STACK_BOUNDARY. */
1182- poly_int64 locals_offset;
1183+ poly_int64 bytes_above_locals;
1184
1185 /* Offset from the base of the frame (incomming SP) to the
1186 hard_frame_pointer. This value is always a multiple of
1187--
11882.34.1
1189
1190
1191From fa6600b55b49ee14d8288f13719ceea2a75eea60 Mon Sep 17 00:00:00 2001
1192From: Richard Sandiford <richard.sandiford@arm.com>
1193Date: Tue, 12 Sep 2023 16:19:47 +0100
1194Subject: [PATCH 09/19] aarch64: Rename hard_fp_offset to bytes_above_hard_fp
1195MIME-Version: 1.0
1196Content-Type: text/plain; charset=UTF-8
1197Content-Transfer-Encoding: 8bit
1198
1199Similarly to the previous locals_offset patch, hard_fp_offset
1200was described as:
1201
1202 /* Offset from the base of the frame (incomming SP) to the
1203 hard_frame_pointer. This value is always a multiple of
1204 STACK_BOUNDARY. */
1205 poly_int64 hard_fp_offset;
1206
1207which again took an “upside-down” view: higher offsets meant lower
1208addresses. This patch renames the field to bytes_above_hard_fp instead.
1209
1210gcc/
1211 * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename
1212 to...
1213 (aarch64_frame::bytes_above_hard_fp): ...this.
1214 * config/aarch64/aarch64.c (aarch64_layout_frame)
1215 (aarch64_expand_prologue): Update accordingly.
1216 (aarch64_initial_elimination_offset): Likewise.
1217---
1218 gcc/config/aarch64/aarch64.c | 26 +++++++++++++-------------
1219 gcc/config/aarch64/aarch64.h | 6 +++---
1220 2 files changed, 16 insertions(+), 16 deletions(-)
1221
1222diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
1223index d54f7a89306..23cb084e5a7 100644
1224--- a/gcc/config/aarch64/aarch64.c
1225+++ b/gcc/config/aarch64/aarch64.c
1226@@ -7588,7 +7588,7 @@ aarch64_layout_frame (void)
1227 + get_frame_size (),
1228 STACK_BOUNDARY / BITS_PER_UNIT);
1229
1230- frame.hard_fp_offset
1231+ frame.bytes_above_hard_fp
1232 = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
1233
1234 /* Both these values are already aligned. */
1235@@ -7609,13 +7609,13 @@ aarch64_layout_frame (void)
1236 else if (frame.wb_candidate1 != INVALID_REGNUM)
1237 max_push_offset = 256;
1238
1239- HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
1240+ HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
1241 HOST_WIDE_INT const_saved_regs_size;
1242 if (known_eq (frame.saved_regs_size, 0))
1243 frame.initial_adjust = frame.frame_size;
1244 else if (frame.frame_size.is_constant (&const_size)
1245 && const_size < max_push_offset
1246- && known_eq (frame.hard_fp_offset, const_size))
1247+ && known_eq (frame.bytes_above_hard_fp, const_size))
1248 {
1249 /* Simple, small frame with no data below the saved registers.
1250
1251@@ -7632,8 +7632,8 @@ aarch64_layout_frame (void)
1252 case that it hardly seems worth the effort though. */
1253 && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
1254 && !(cfun->calls_alloca
1255- && frame.hard_fp_offset.is_constant (&const_fp_offset)
1256- && const_fp_offset < max_push_offset))
1257+ && frame.bytes_above_hard_fp.is_constant (&const_above_fp)
1258+ && const_above_fp < max_push_offset))
1259 {
1260 /* Frame with small area below the saved registers:
1261
1262@@ -7651,12 +7651,12 @@ aarch64_layout_frame (void)
1263 sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
1264 save SVE registers relative to SP
1265 sub sp, sp, bytes_below_saved_regs */
1266- frame.initial_adjust = (frame.hard_fp_offset
1267+ frame.initial_adjust = (frame.bytes_above_hard_fp
1268 + frame.below_hard_fp_saved_regs_size);
1269 frame.final_adjust = frame.bytes_below_saved_regs;
1270 }
1271- else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
1272- && const_fp_offset < max_push_offset)
1273+ else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp)
1274+ && const_above_fp < max_push_offset)
1275 {
1276 /* Frame with large area below the saved registers, or with SVE saves,
1277 but with a small area above:
1278@@ -7666,7 +7666,7 @@ aarch64_layout_frame (void)
1279 [sub sp, sp, below_hard_fp_saved_regs_size]
1280 [save SVE registers relative to SP]
1281 sub sp, sp, bytes_below_saved_regs */
1282- frame.callee_adjust = const_fp_offset;
1283+ frame.callee_adjust = const_above_fp;
1284 frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
1285 frame.final_adjust = frame.bytes_below_saved_regs;
1286 }
1287@@ -7681,7 +7681,7 @@ aarch64_layout_frame (void)
1288 [sub sp, sp, below_hard_fp_saved_regs_size]
1289 [save SVE registers relative to SP]
1290 sub sp, sp, bytes_below_saved_regs */
1291- frame.initial_adjust = frame.hard_fp_offset;
1292+ frame.initial_adjust = frame.bytes_above_hard_fp;
1293 frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
1294 frame.final_adjust = frame.bytes_below_saved_regs;
1295 }
1296@@ -8981,7 +8981,7 @@ aarch64_expand_prologue (void)
1297 {
1298 /* The offset of the frame chain record (if any) from the current SP. */
1299 poly_int64 chain_offset = (initial_adjust + callee_adjust
1300- - frame.hard_fp_offset);
1301+ - frame.bytes_above_hard_fp);
1302 gcc_assert (known_ge (chain_offset, 0));
1303
1304 if (callee_adjust == 0)
1305@@ -11755,10 +11755,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
1306 if (to == HARD_FRAME_POINTER_REGNUM)
1307 {
1308 if (from == ARG_POINTER_REGNUM)
1309- return frame.hard_fp_offset;
1310+ return frame.bytes_above_hard_fp;
1311
1312 if (from == FRAME_POINTER_REGNUM)
1313- return frame.hard_fp_offset - frame.bytes_above_locals;
1314+ return frame.bytes_above_hard_fp - frame.bytes_above_locals;
1315 }
1316
1317 if (to == STACK_POINTER_REGNUM)
1318diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
1319index 3c5e3dd429d..9291cfd3ec8 100644
1320--- a/gcc/config/aarch64/aarch64.h
1321+++ b/gcc/config/aarch64/aarch64.h
1322@@ -856,10 +856,10 @@ struct GTY (()) aarch64_frame
1323 STACK_BOUNDARY. */
1324 poly_int64 bytes_above_locals;
1325
1326- /* Offset from the base of the frame (incomming SP) to the
1327- hard_frame_pointer. This value is always a multiple of
1328+ /* The number of bytes between the hard_frame_pointer and the top of
1329+ the frame (the incomming SP). This value is always a multiple of
1330 STACK_BOUNDARY. */
1331- poly_int64 hard_fp_offset;
1332+ poly_int64 bytes_above_hard_fp;
1333
1334 /* The size of the frame. This value is the offset from base of the
1335 frame (incomming SP) to the stack_pointer. This value is always
1336--
13372.34.1
1338
1339
1340From b8cd5a0229da78c2d1289d54731fbef0126617d5 Mon Sep 17 00:00:00 2001
1341From: Richard Sandiford <richard.sandiford@arm.com>
1342Date: Tue, 12 Sep 2023 16:19:47 +0100
1343Subject: [PATCH 10/19] aarch64: Tweak frame_size comment
1344MIME-Version: 1.0
1345Content-Type: text/plain; charset=UTF-8
1346Content-Transfer-Encoding: 8bit
1347
1348This patch fixes another case in which a value was described with
1349an “upside-down” view.
1350
1351gcc/
1352 * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment.
1353---
1354 gcc/config/aarch64/aarch64.h | 4 ++--
1355 1 file changed, 2 insertions(+), 2 deletions(-)
1356
1357diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
1358index 9291cfd3ec8..82883ad5a0d 100644
1359--- a/gcc/config/aarch64/aarch64.h
1360+++ b/gcc/config/aarch64/aarch64.h
1361@@ -861,8 +861,8 @@ struct GTY (()) aarch64_frame
1362 STACK_BOUNDARY. */
1363 poly_int64 bytes_above_hard_fp;
1364
1365- /* The size of the frame. This value is the offset from base of the
1366- frame (incomming SP) to the stack_pointer. This value is always
1367+ /* The size of the frame, i.e. the number of bytes between the bottom
1368+ of the outgoing arguments and the incoming SP. This value is always
1369 a multiple of STACK_BOUNDARY. */
1370 poly_int64 frame_size;
1371
1372--
13732.34.1
1374
1375
1376From 999c4a81cffddb850d6ab0f6d3a8de3e704d2f7a Mon Sep 17 00:00:00 2001
1377From: Richard Sandiford <richard.sandiford@arm.com>
1378Date: Tue, 12 Sep 2023 16:19:48 +0100
1379Subject: [PATCH 11/19] aarch64: Measure reg_offset from the bottom of the
1380 frame
1381
1382reg_offset was measured from the bottom of the saved register area.
1383This made perfect sense with the original layout, since the bottom
1384of the saved register area was also the hard frame pointer address.
1385It became slightly less obvious with SVE, since we save SVE
1386registers below the hard frame pointer, but it still made sense.
1387
1388However, if we want to allow different frame layouts, it's more
1389convenient and obvious to measure reg_offset from the bottom of
1390the frame. After previous patches, it's also a slight simplification
1391in its own right.
1392
1393gcc/
1394 * config/aarch64/aarch64.h (aarch64_frame): Add comment above
1395 reg_offset.
1396 * config/aarch64/aarch64.c (aarch64_layout_frame): Walk offsets
1397 from the bottom of the frame, rather than the bottom of the saved
1398 register area. Measure reg_offset from the bottom of the frame
1399 rather than the bottom of the saved register area.
1400 (aarch64_save_callee_saves): Update accordingly.
1401 (aarch64_restore_callee_saves): Likewise.
1402 (aarch64_get_separate_components): Likewise.
1403 (aarch64_process_components): Likewise.
1404---
1405 gcc/config/aarch64/aarch64.c | 53 ++++++++++++++++--------------------
1406 gcc/config/aarch64/aarch64.h | 3 ++
1407 2 files changed, 27 insertions(+), 29 deletions(-)
1408
1409diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
1410index 23cb084e5a7..45ff664cba6 100644
1411--- a/gcc/config/aarch64/aarch64.c
1412+++ b/gcc/config/aarch64/aarch64.c
1413@@ -7398,7 +7398,6 @@ aarch64_needs_frame_chain (void)
1414 static void
1415 aarch64_layout_frame (void)
1416 {
1417- poly_int64 offset = 0;
1418 int regno, last_fp_reg = INVALID_REGNUM;
1419 machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
1420 poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
1421@@ -7476,7 +7475,9 @@ aarch64_layout_frame (void)
1422 gcc_assert (crtl->is_leaf
1423 || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
1424
1425- frame.bytes_below_saved_regs = crtl->outgoing_args_size;
1426+ poly_int64 offset = crtl->outgoing_args_size;
1427+ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
1428+ frame.bytes_below_saved_regs = offset;
1429
1430 /* Now assign stack slots for the registers. Start with the predicate
1431 registers, since predicate LDR and STR have a relatively small
1432@@ -7488,7 +7489,8 @@ aarch64_layout_frame (void)
1433 offset += BYTES_PER_SVE_PRED;
1434 }
1435
1436- if (maybe_ne (offset, 0))
1437+ poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs;
1438+ if (maybe_ne (saved_prs_size, 0))
1439 {
1440 /* If we have any vector registers to save above the predicate registers,
1441 the offset of the vector register save slots need to be a multiple
1442@@ -7506,10 +7508,10 @@ aarch64_layout_frame (void)
1443 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1444 else
1445 {
1446- if (known_le (offset, vector_save_size))
1447- offset = vector_save_size;
1448- else if (known_le (offset, vector_save_size * 2))
1449- offset = vector_save_size * 2;
1450+ if (known_le (saved_prs_size, vector_save_size))
1451+ offset = frame.bytes_below_saved_regs + vector_save_size;
1452+ else if (known_le (saved_prs_size, vector_save_size * 2))
1453+ offset = frame.bytes_below_saved_regs + vector_save_size * 2;
1454 else
1455 gcc_unreachable ();
1456 }
1457@@ -7526,9 +7528,10 @@ aarch64_layout_frame (void)
1458
1459 /* OFFSET is now the offset of the hard frame pointer from the bottom
1460 of the callee save area. */
1461- bool saves_below_hard_fp_p = maybe_ne (offset, 0);
1462- frame.below_hard_fp_saved_regs_size = offset;
1463- frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
1464+ frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
1465+ bool saves_below_hard_fp_p
1466+ = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
1467+ frame.bytes_below_hard_fp = offset;
1468 if (frame.emit_frame_chain)
1469 {
1470 /* FP and LR are placed in the linkage record. */
1471@@ -7579,9 +7582,10 @@ aarch64_layout_frame (void)
1472
1473 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1474
1475- frame.saved_regs_size = offset;
1476+ frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
1477
1478- poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
1479+ poly_int64 varargs_and_saved_regs_size
1480+ = frame.saved_regs_size + frame.saved_varargs_size;
1481
1482 poly_int64 saved_regs_and_above
1483 = aligned_upper_bound (varargs_and_saved_regs_size
1484@@ -8021,9 +8025,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
1485
1486 machine_mode mode = aarch64_reg_save_mode (regno);
1487 reg = gen_rtx_REG (mode, regno);
1488- offset = (frame.reg_offset[regno]
1489- + frame.bytes_below_saved_regs
1490- - bytes_below_sp);
1491+ offset = frame.reg_offset[regno] - bytes_below_sp;
1492 rtx base_rtx = stack_pointer_rtx;
1493 poly_int64 sp_offset = offset;
1494
1495@@ -8130,9 +8132,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
1496
1497 machine_mode mode = aarch64_reg_save_mode (regno);
1498 reg = gen_rtx_REG (mode, regno);
1499- offset = (frame.reg_offset[regno]
1500- + frame.bytes_below_saved_regs
1501- - bytes_below_sp);
1502+ offset = frame.reg_offset[regno] - bytes_below_sp;
1503 rtx base_rtx = stack_pointer_rtx;
1504 if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
1505 aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
1506@@ -8271,14 +8271,12 @@ aarch64_get_separate_components (void)
1507 it as a stack probe for -fstack-clash-protection. */
1508 if (flag_stack_clash_protection
1509 && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
1510- && known_eq (offset, 0))
1511+ && known_eq (offset, frame.bytes_below_saved_regs))
1512 continue;
1513
1514 /* Get the offset relative to the register we'll use. */
1515 if (frame_pointer_needed)
1516- offset -= frame.below_hard_fp_saved_regs_size;
1517- else
1518- offset += frame.bytes_below_saved_regs;
1519+ offset -= frame.bytes_below_hard_fp;
1520
1521 /* Check that we can access the stack slot of the register with one
1522 direct load with no adjustments needed. */
1523@@ -8425,9 +8423,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
1524 rtx reg = gen_rtx_REG (mode, regno);
1525 poly_int64 offset = frame.reg_offset[regno];
1526 if (frame_pointer_needed)
1527- offset -= frame.below_hard_fp_saved_regs_size;
1528- else
1529- offset += frame.bytes_below_saved_regs;
1530+ offset -= frame.bytes_below_hard_fp;
1531
1532 rtx addr = plus_constant (Pmode, ptr_reg, offset);
1533 rtx mem = gen_frame_mem (mode, addr);
1534@@ -8479,9 +8475,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
1535 /* REGNO2 can be saved/restored in a pair with REGNO. */
1536 rtx reg2 = gen_rtx_REG (mode, regno2);
1537 if (frame_pointer_needed)
1538- offset2 -= frame.below_hard_fp_saved_regs_size;
1539- else
1540- offset2 += frame.bytes_below_saved_regs;
1541+ offset2 -= frame.bytes_below_hard_fp;
1542 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
1543 rtx mem2 = gen_frame_mem (mode, addr2);
1544 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
1545@@ -8597,7 +8591,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
1546 if (final_adjustment_p
1547 && known_eq (frame.below_hard_fp_saved_regs_size, 0))
1548 {
1549- poly_int64 lr_offset = frame.reg_offset[LR_REGNUM];
1550+ poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM]
1551+ - frame.bytes_below_saved_regs);
1552 if (known_ge (lr_offset, 0))
1553 min_probe_threshold -= lr_offset.to_constant ();
1554 else
1555diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
1556index 82883ad5a0d..c8ec3d58495 100644
1557--- a/gcc/config/aarch64/aarch64.h
1558+++ b/gcc/config/aarch64/aarch64.h
1559@@ -826,6 +826,9 @@ extern enum aarch64_processor aarch64_tune;
1560 #ifdef HAVE_POLY_INT_H
1561 struct GTY (()) aarch64_frame
1562 {
1563+ /* The offset from the bottom of the static frame (the bottom of the
1564+ outgoing arguments) of each register save slot, or -2 if no save is
1565+ needed. */
1566 poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
1567
1568 /* The number of extra stack bytes taken up by register varargs.
1569--
15702.34.1
1571
1572
1573From 8b664cc8f05c8130e8ca73a59ae2751cdef8a0ea Mon Sep 17 00:00:00 2001
1574From: Richard Sandiford <richard.sandiford@arm.com>
1575Date: Tue, 12 Sep 2023 16:19:48 +0100
1576Subject: [PATCH 12/19] aarch64: Simplify top of frame allocation
1577
1578After previous patches, it no longer really makes sense to allocate
1579the top of the frame in terms of varargs_and_saved_regs_size and
1580saved_regs_and_above.
1581
1582gcc/
1583 * config/aarch64/aarch64.c (aarch64_layout_frame): Simplify
1584 the allocation of the top of the frame.
1585---
1586 gcc/config/aarch64/aarch64.c | 23 ++++++++---------------
1587 1 file changed, 8 insertions(+), 15 deletions(-)
1588
1589diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
1590index 45ff664cba6..779547d0344 100644
1591--- a/gcc/config/aarch64/aarch64.c
1592+++ b/gcc/config/aarch64/aarch64.c
1593@@ -7584,23 +7584,16 @@ aarch64_layout_frame (void)
1594
1595 frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
1596
1597- poly_int64 varargs_and_saved_regs_size
1598- = frame.saved_regs_size + frame.saved_varargs_size;
1599-
1600- poly_int64 saved_regs_and_above
1601- = aligned_upper_bound (varargs_and_saved_regs_size
1602- + get_frame_size (),
1603- STACK_BOUNDARY / BITS_PER_UNIT);
1604-
1605- frame.bytes_above_hard_fp
1606- = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
1607+ offset += get_frame_size ();
1608+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1609+ auto top_of_locals = offset;
1610
1611- /* Both these values are already aligned. */
1612- gcc_assert (multiple_p (frame.bytes_below_saved_regs,
1613- STACK_BOUNDARY / BITS_PER_UNIT));
1614- frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
1615+ offset += frame.saved_varargs_size;
1616+ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
1617+ frame.frame_size = offset;
1618
1619- frame.bytes_above_locals = frame.saved_varargs_size;
1620+ frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
1621+ frame.bytes_above_locals = frame.frame_size - top_of_locals;
1622
1623 frame.initial_adjust = 0;
1624 frame.final_adjust = 0;
1625--
16262.34.1
1627
1628
1629From bb4600071acc3a02db4f37ffb95c8495ad76a140 Mon Sep 17 00:00:00 2001
1630From: Richard Sandiford <richard.sandiford@arm.com>
1631Date: Tue, 12 Sep 2023 16:19:49 +0100
1632Subject: [PATCH 13/19] aarch64: Minor initial adjustment tweak
1633
1634This patch just changes a calculation of initial_adjust
1635to one that makes it slightly more obvious that the total
1636adjustment is frame.frame_size.
1637
1638gcc/
1639 * config/aarch64/aarch64.c (aarch64_layout_frame): Tweak
1640 calculation of initial_adjust for frames in which all saves
1641 are SVE saves.
1642---
1643 gcc/config/aarch64/aarch64.c | 5 ++---
1644 1 file changed, 2 insertions(+), 3 deletions(-)
1645
1646diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
1647index 779547d0344..0b8992ada74 100644
1648--- a/gcc/config/aarch64/aarch64.c
1649+++ b/gcc/config/aarch64/aarch64.c
1650@@ -7645,11 +7645,10 @@ aarch64_layout_frame (void)
1651 {
1652 /* Frame in which all saves are SVE saves:
1653
1654- sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
1655+ sub sp, sp, frame_size - bytes_below_saved_regs
1656 save SVE registers relative to SP
1657 sub sp, sp, bytes_below_saved_regs */
1658- frame.initial_adjust = (frame.bytes_above_hard_fp
1659- + frame.below_hard_fp_saved_regs_size);
1660+ frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs;
1661 frame.final_adjust = frame.bytes_below_saved_regs;
1662 }
1663 else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp)
1664--
16652.34.1
1666
1667
1668From f22329d5efbacf80edf4a2d45ebadd93f283252c Mon Sep 17 00:00:00 2001
1669From: Richard Sandiford <richard.sandiford@arm.com>
1670Date: Tue, 12 Sep 2023 16:19:49 +0100
1671Subject: [PATCH 14/19] aarch64: Tweak stack clash boundary condition
1672
1673The AArch64 ABI says that, when stack clash protection is used,
1674there can be a maximum of 1KiB of unprobed space at sp on entry
1675to a function. Therefore, we need to probe when allocating
1676>= guard_size - 1KiB of data (>= rather than >). This is what
1677GCC does.
1678
1679If an allocation is exactly guard_size bytes, it is enough to allocate
1680those bytes and probe once at offset 1024. It isn't possible to use a
1681single probe at any other offset: higher would conmplicate later code,
1682by leaving more unprobed space than usual, while lower would risk
1683leaving an entire page unprobed. For simplicity, the code probes all
1684allocations at offset 1024.
1685
1686Some register saves also act as probes. If we need to allocate
1687more space below the last such register save probe, we need to
1688probe the allocation if it is > 1KiB. Again, this allocation is
1689then sometimes (but not always) probed at offset 1024. This sort of
1690allocation is currently only used for outgoing arguments, which are
1691rarely this big.
1692
1693However, the code also probed if this final outgoing-arguments
1694allocation was == 1KiB, rather than just > 1KiB. This isn't
1695necessary, since the register save then probes at offset 1024
1696as required. Continuing to probe allocations of exactly 1KiB
1697would complicate later patches.
1698
1699gcc/
1700 * config/aarch64/aarch64.c (aarch64_allocate_and_probe_stack_space):
1701 Don't probe final allocations that are exactly 1KiB in size (after
1702 unprobed space above the final allocation has been deducted).
1703
1704gcc/testsuite/
1705 * gcc.target/aarch64/stack-check-prologue-17.c: New test.
1706---
1707 gcc/config/aarch64/aarch64.c | 4 +-
1708 .../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++
1709 2 files changed, 58 insertions(+), 1 deletion(-)
1710 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
1711
1712diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
1713index 0b8992ada74..bfd24876195 100644
1714--- a/gcc/config/aarch64/aarch64.c
1715+++ b/gcc/config/aarch64/aarch64.c
1716@@ -8564,9 +8564,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
1717 HOST_WIDE_INT guard_size
1718 = 1 << param_stack_clash_protection_guard_size;
1719 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
1720+ HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
1721+ gcc_assert (multiple_p (poly_size, byte_sp_alignment));
1722 HOST_WIDE_INT min_probe_threshold
1723 = (final_adjustment_p
1724- ? guard_used_by_caller
1725+ ? guard_used_by_caller + byte_sp_alignment
1726 : guard_size - guard_used_by_caller);
1727 /* When doing the final adjustment for the outgoing arguments, take into
1728 account any unprobed space there is above the current SP. There are
1729diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
1730new file mode 100644
1731index 00000000000..0d8a25d73a2
1732--- /dev/null
1733+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
1734@@ -0,0 +1,55 @@
1735+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
1736+/* { dg-final { check-function-bodies "**" "" } } */
1737+
1738+void f(int, ...);
1739+void g();
1740+
1741+/*
1742+** test1:
1743+** ...
1744+** str x30, \[sp\]
1745+** sub sp, sp, #1024
1746+** cbnz w0, .*
1747+** bl g
1748+** ...
1749+*/
1750+int test1(int z) {
1751+ __uint128_t x = 0;
1752+ int y[0x400];
1753+ if (z)
1754+ {
1755+ f(0, 0, 0, 0, 0, 0, 0, &y,
1756+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1757+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1758+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1759+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
1760+ }
1761+ g();
1762+ return 1;
1763+}
1764+
1765+/*
1766+** test2:
1767+** ...
1768+** str x30, \[sp\]
1769+** sub sp, sp, #1040
1770+** str xzr, \[sp\]
1771+** cbnz w0, .*
1772+** bl g
1773+** ...
1774+*/
1775+int test2(int z) {
1776+ __uint128_t x = 0;
1777+ int y[0x400];
1778+ if (z)
1779+ {
1780+ f(0, 0, 0, 0, 0, 0, 0, &y,
1781+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1782+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1783+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1784+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1785+ x);
1786+ }
1787+ g();
1788+ return 1;
1789+}
1790--
17912.34.1
1792
1793
1794From 174a9747491e591ef2abb3e20a0332303f11003a Mon Sep 17 00:00:00 2001
1795From: Richard Sandiford <richard.sandiford@arm.com>
1796Date: Tue, 12 Sep 2023 16:19:49 +0100
1797Subject: [PATCH 15/19] aarch64: Put LR save probe in first 16 bytes
1798
1799-fstack-clash-protection uses the save of LR as a probe for the next
1800allocation. The next allocation could be:
1801
1802* another part of the static frame, e.g. when allocating SVE save slots
1803 or outgoing arguments
1804
1805* an alloca in the same function
1806
1807* an allocation made by a callee function
1808
1809However, when -fomit-frame-pointer is used, the LR save slot is placed
1810above the other GPR save slots. It could therefore be up to 80 bytes
1811above the base of the GPR save area (which is also the hard fp address).
1812
1813aarch64_allocate_and_probe_stack_space took this into account when
1814deciding how much subsequent space could be allocated without needing
1815a probe. However, it interacted badly with:
1816
1817 /* If doing a small final adjustment, we always probe at offset 0.
1818 This is done to avoid issues when LR is not at position 0 or when
1819 the final adjustment is smaller than the probing offset. */
1820 else if (final_adjustment_p && rounded_size == 0)
1821 residual_probe_offset = 0;
1822
1823which forces any allocation that is smaller than the guard page size
1824to be probed at offset 0 rather than the usual offset 1024. It was
1825therefore possible to construct cases in which we had:
1826
1827* a probe using LR at SP + 80 bytes (or some other value >= 16)
1828* an allocation of the guard page size - 16 bytes
1829* a probe at SP + 0
1830
1831which allocates guard page size + 64 consecutive unprobed bytes.
1832
1833This patch requires the LR probe to be in the first 16 bytes of the
1834save area when stack clash protection is active. Doing it
1835unconditionally would cause code-quality regressions, but a later
1836patch deals with that.
1837
1838The new comment doesn't say that the probe register is required
1839to be LR, since a later patch removes that restriction.
1840
1841gcc/
1842 * config/aarch64/aarch64.c (aarch64_layout_frame): Ensure that
1843 the LR save slot is in the first 16 bytes of the register save area.
1844 (aarch64_allocate_and_probe_stack_space): Remove workaround for
1845 when LR was not in the first 16 bytes.
1846
1847gcc/testsuite/
1848 * gcc.target/aarch64/stack-check-prologue-18.c: New test.
1849---
1850 gcc/config/aarch64/aarch64.c | 61 ++++-------
1851 .../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++
1852 2 files changed, 123 insertions(+), 38 deletions(-)
1853 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
1854
1855diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
1856index bfd24876195..3f2b10de987 100644
1857--- a/gcc/config/aarch64/aarch64.c
1858+++ b/gcc/config/aarch64/aarch64.c
1859@@ -7532,26 +7532,34 @@ aarch64_layout_frame (void)
1860 bool saves_below_hard_fp_p
1861 = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
1862 frame.bytes_below_hard_fp = offset;
1863+
1864+ auto allocate_gpr_slot = [&](unsigned int regno)
1865+ {
1866+ frame.reg_offset[regno] = offset;
1867+ if (frame.wb_candidate1 == INVALID_REGNUM)
1868+ frame.wb_candidate1 = regno;
1869+ else if (frame.wb_candidate2 == INVALID_REGNUM)
1870+ frame.wb_candidate2 = regno;
1871+ offset += UNITS_PER_WORD;
1872+ };
1873+
1874 if (frame.emit_frame_chain)
1875 {
1876 /* FP and LR are placed in the linkage record. */
1877- frame.reg_offset[R29_REGNUM] = offset;
1878- frame.wb_candidate1 = R29_REGNUM;
1879- frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD;
1880- frame.wb_candidate2 = R30_REGNUM;
1881- offset += 2 * UNITS_PER_WORD;
1882+ allocate_gpr_slot (R29_REGNUM);
1883+ allocate_gpr_slot (R30_REGNUM);
1884 }
1885+ else if (flag_stack_clash_protection
1886+ && known_eq (frame.reg_offset[R30_REGNUM], SLOT_REQUIRED))
1887+ /* Put the LR save slot first, since it makes a good choice of probe
1888+ for stack clash purposes. The idea is that the link register usually
1889+ has to be saved before a call anyway, and so we lose little by
1890+ stopping it from being individually shrink-wrapped. */
1891+ allocate_gpr_slot (R30_REGNUM);
1892
1893 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1894 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
1895- {
1896- frame.reg_offset[regno] = offset;
1897- if (frame.wb_candidate1 == INVALID_REGNUM)
1898- frame.wb_candidate1 = regno;
1899- else if (frame.wb_candidate2 == INVALID_REGNUM)
1900- frame.wb_candidate2 = regno;
1901- offset += UNITS_PER_WORD;
1902- }
1903+ allocate_gpr_slot (regno);
1904
1905 poly_int64 max_int_offset = offset;
1906 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1907@@ -8570,29 +8578,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
1908 = (final_adjustment_p
1909 ? guard_used_by_caller + byte_sp_alignment
1910 : guard_size - guard_used_by_caller);
1911- /* When doing the final adjustment for the outgoing arguments, take into
1912- account any unprobed space there is above the current SP. There are
1913- two cases:
1914-
1915- - When saving SVE registers below the hard frame pointer, we force
1916- the lowest save to take place in the prologue before doing the final
1917- adjustment (i.e. we don't allow the save to be shrink-wrapped).
1918- This acts as a probe at SP, so there is no unprobed space.
1919-
1920- - When there are no SVE register saves, we use the store of the link
1921- register as a probe. We can't assume that LR was saved at position 0
1922- though, so treat any space below it as unprobed. */
1923- if (final_adjustment_p
1924- && known_eq (frame.below_hard_fp_saved_regs_size, 0))
1925- {
1926- poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM]
1927- - frame.bytes_below_saved_regs);
1928- if (known_ge (lr_offset, 0))
1929- min_probe_threshold -= lr_offset.to_constant ();
1930- else
1931- gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
1932- }
1933-
1934 poly_int64 frame_size = frame.frame_size;
1935
1936 /* We should always have a positive probe threshold. */
1937@@ -8772,8 +8757,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
1938 if (final_adjustment_p && rounded_size != 0)
1939 min_probe_threshold = 0;
1940 /* If doing a small final adjustment, we always probe at offset 0.
1941- This is done to avoid issues when LR is not at position 0 or when
1942- the final adjustment is smaller than the probing offset. */
1943+ This is done to avoid issues when the final adjustment is smaller
1944+ than the probing offset. */
1945 else if (final_adjustment_p && rounded_size == 0)
1946 residual_probe_offset = 0;
1947
1948diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
1949new file mode 100644
1950index 00000000000..82447d20fff
1951--- /dev/null
1952+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
1953@@ -0,0 +1,100 @@
1954+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
1955+/* { dg-final { check-function-bodies "**" "" } } */
1956+
1957+void f(int, ...);
1958+void g();
1959+
1960+/*
1961+** test1:
1962+** ...
1963+** str x30, \[sp\]
1964+** sub sp, sp, #4064
1965+** str xzr, \[sp\]
1966+** cbnz w0, .*
1967+** bl g
1968+** ...
1969+** str x26, \[sp, #?4128\]
1970+** ...
1971+*/
1972+int test1(int z) {
1973+ __uint128_t x = 0;
1974+ int y[0x400];
1975+ if (z)
1976+ {
1977+ asm volatile ("" :::
1978+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
1979+ f(0, 0, 0, 0, 0, 0, 0, &y,
1980+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1981+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1982+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1983+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1984+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1985+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1986+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1987+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1988+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1989+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1990+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1991+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1992+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1993+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1994+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
1995+ x, x, x, x, x, x, x, x, x, x, x, x, x, x);
1996+ }
1997+ g();
1998+ return 1;
1999+}
2000+
2001+/*
2002+** test2:
2003+** ...
2004+** str x30, \[sp\]
2005+** sub sp, sp, #1040
2006+** str xzr, \[sp\]
2007+** cbnz w0, .*
2008+** bl g
2009+** ...
2010+*/
2011+int test2(int z) {
2012+ __uint128_t x = 0;
2013+ int y[0x400];
2014+ if (z)
2015+ {
2016+ asm volatile ("" :::
2017+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
2018+ f(0, 0, 0, 0, 0, 0, 0, &y,
2019+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
2020+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
2021+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
2022+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
2023+ x);
2024+ }
2025+ g();
2026+ return 1;
2027+}
2028+
2029+/*
2030+** test3:
2031+** ...
2032+** str x30, \[sp\]
2033+** sub sp, sp, #1024
2034+** cbnz w0, .*
2035+** bl g
2036+** ...
2037+*/
2038+int test3(int z) {
2039+ __uint128_t x = 0;
2040+ int y[0x400];
2041+ if (z)
2042+ {
2043+ asm volatile ("" :::
2044+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
2045+ f(0, 0, 0, 0, 0, 0, 0, &y,
2046+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
2047+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
2048+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
2049+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
2050+ }
2051+ g();
2052+ return 1;
2053+}
2054--
20552.34.1
2056
2057
2058From e932e11c353be52256dd30d30d924f4e834e3ca3 Mon Sep 17 00:00:00 2001
2059From: Richard Sandiford <richard.sandiford@arm.com>
2060Date: Tue, 12 Sep 2023 16:19:51 +0100
2061Subject: [PATCH 16/19] aarch64: Simplify probe of final frame allocation
2062
2063Previous patches ensured that the final frame allocation only needs
2064a probe when the size is strictly greater than 1KiB. It's therefore
2065safe to use the normal 1024 probe offset in all cases.
2066
2067The main motivation for doing this is to simplify the code and
2068remove the number of special cases.
2069
2070gcc/
2071 * config/aarch64/aarch64.c (aarch64_allocate_and_probe_stack_space):
2072 Always probe the residual allocation at offset 1024, asserting
2073 that that is in range.
2074
2075gcc/testsuite/
2076 * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe
2077 to be at offset 1024 rather than offset 0.
2078 * gcc.target/aarch64/stack-check-prologue-18.c: Likewise.
2079---
2080 gcc/config/aarch64/aarch64.c | 12 ++++--------
2081 .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +-
2082 .../gcc.target/aarch64/stack-check-prologue-18.c | 4 ++--
2083 3 files changed, 7 insertions(+), 11 deletions(-)
2084
2085diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
2086index 3f2b10de987..4b9cd687525 100644
2087--- a/gcc/config/aarch64/aarch64.c
2088+++ b/gcc/config/aarch64/aarch64.c
2089@@ -8751,16 +8751,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
2090 are still safe. */
2091 if (residual)
2092 {
2093- HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
2094+ gcc_assert (guard_used_by_caller + byte_sp_alignment <= size);
2095+
2096 /* If we're doing final adjustments, and we've done any full page
2097 allocations then any residual needs to be probed. */
2098 if (final_adjustment_p && rounded_size != 0)
2099 min_probe_threshold = 0;
2100- /* If doing a small final adjustment, we always probe at offset 0.
2101- This is done to avoid issues when the final adjustment is smaller
2102- than the probing offset. */
2103- else if (final_adjustment_p && rounded_size == 0)
2104- residual_probe_offset = 0;
2105
2106 aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
2107 if (residual >= min_probe_threshold)
2108@@ -8771,8 +8767,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
2109 HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
2110 "\n", residual);
2111
2112- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
2113- residual_probe_offset));
2114+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
2115+ guard_used_by_caller));
2116 emit_insn (gen_blockage ());
2117 }
2118 }
2119diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
2120index 0d8a25d73a2..f0ec1389771 100644
2121--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
2122+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
2123@@ -33,7 +33,7 @@ int test1(int z) {
2124 ** ...
2125 ** str x30, \[sp\]
2126 ** sub sp, sp, #1040
2127-** str xzr, \[sp\]
2128+** str xzr, \[sp, #?1024\]
2129 ** cbnz w0, .*
2130 ** bl g
2131 ** ...
2132diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
2133index 82447d20fff..6383bec5ebc 100644
2134--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
2135+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
2136@@ -9,7 +9,7 @@ void g();
2137 ** ...
2138 ** str x30, \[sp\]
2139 ** sub sp, sp, #4064
2140-** str xzr, \[sp\]
2141+** str xzr, \[sp, #?1024\]
2142 ** cbnz w0, .*
2143 ** bl g
2144 ** ...
2145@@ -50,7 +50,7 @@ int test1(int z) {
2146 ** ...
2147 ** str x30, \[sp\]
2148 ** sub sp, sp, #1040
2149-** str xzr, \[sp\]
2150+** str xzr, \[sp, #?1024\]
2151 ** cbnz w0, .*
2152 ** bl g
2153 ** ...
2154--
21552.34.1
2156
2157
2158From 9ed9fd54b2b471745c9489e83496c091a7b64904 Mon Sep 17 00:00:00 2001
2159From: Richard Sandiford <richard.sandiford@arm.com>
2160Date: Tue, 12 Sep 2023 16:19:52 +0100
2161Subject: [PATCH 17/19] aarch64: Explicitly record probe registers in frame
2162 info
2163
2164The stack frame is currently divided into three areas:
2165
2166A: the area above the hard frame pointer
2167B: the SVE saves below the hard frame pointer
2168C: the outgoing arguments
2169
2170If the stack frame is allocated in one chunk, the allocation needs a
2171probe if the frame size is >= guard_size - 1KiB. In addition, if the
2172function is not a leaf function, it must probe an address no more than
21731KiB above the outgoing SP. We ensured the second condition by
2174
2175(1) using single-chunk allocations for non-leaf functions only if
2176 the link register save slot is within 512 bytes of the bottom
2177 of the frame; and
2178
2179(2) using the link register save as a probe (meaning, for instance,
2180 that it can't be individually shrink wrapped)
2181
2182If instead the stack is allocated in multiple chunks, then:
2183
2184* an allocation involving only the outgoing arguments (C above) requires
2185 a probe if the allocation size is > 1KiB
2186
2187* any other allocation requires a probe if the allocation size
2188 is >= guard_size - 1KiB
2189
2190* second and subsequent allocations require the previous allocation
2191 to probe at the bottom of the allocated area, regardless of the size
2192 of that previous allocation
2193
2194The final point means that, unlike for single allocations,
2195it can be necessary to have both a non-SVE register probe and
2196an SVE register probe. For example:
2197
2198* allocate A, probe using a non-SVE register save
2199* allocate B, probe using an SVE register save
2200* allocate C
2201
2202The non-SVE register used in this case was again the link register.
2203It was previously used even if the link register save slot was some
2204bytes above the bottom of the non-SVE register saves, but an earlier
2205patch avoided that by putting the link register save slot first.
2206
2207As a belt-and-braces fix, this patch explicitly records which
2208probe registers we're using and allows the non-SVE probe to be
2209whichever register comes first (as for SVE).
2210
2211The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c.
2212
2213gcc/
2214 * config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe)
2215 (aarch64_frame::hard_fp_save_and_probe): New fields.
2216 * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize them.
2217 Rather than asserting that a leaf function saves LR, instead assert
2218 that a leaf function saves something.
2219 (aarch64_get_separate_components): Prevent the chosen probe
2220 registers from being individually shrink-wrapped.
2221 (aarch64_allocate_and_probe_stack_space): Remove workaround for
2222 probe registers that aren't at the bottom of the previous allocation.
2223
2224gcc/testsuite/
2225 * gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes.
2226---
2227 gcc/config/aarch64/aarch64.c | 68 +++++++++++++++----
2228 gcc/config/aarch64/aarch64.h | 8 +++
2229 .../aarch64/sve/pcs/stack_clash_3.c | 6 +-
2230 3 files changed, 64 insertions(+), 18 deletions(-)
2231
2232diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
2233index 4b9cd687525..ef4b3b671ba 100644
2234--- a/gcc/config/aarch64/aarch64.c
2235+++ b/gcc/config/aarch64/aarch64.c
2236@@ -7469,15 +7469,11 @@ aarch64_layout_frame (void)
2237 && !crtl->abi->clobbers_full_reg_p (regno))
2238 frame.reg_offset[regno] = SLOT_REQUIRED;
2239
2240- /* With stack-clash, LR must be saved in non-leaf functions. The saving of
2241- LR counts as an implicit probe which allows us to maintain the invariant
2242- described in the comment at expand_prologue. */
2243- gcc_assert (crtl->is_leaf
2244- || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
2245
2246 poly_int64 offset = crtl->outgoing_args_size;
2247 gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
2248 frame.bytes_below_saved_regs = offset;
2249+ frame.sve_save_and_probe = INVALID_REGNUM;
2250
2251 /* Now assign stack slots for the registers. Start with the predicate
2252 registers, since predicate LDR and STR have a relatively small
2253@@ -7485,6 +7481,8 @@ aarch64_layout_frame (void)
2254 for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
2255 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
2256 {
2257+ if (frame.sve_save_and_probe == INVALID_REGNUM)
2258+ frame.sve_save_and_probe = regno;
2259 frame.reg_offset[regno] = offset;
2260 offset += BYTES_PER_SVE_PRED;
2261 }
2262@@ -7522,6 +7520,8 @@ aarch64_layout_frame (void)
2263 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2264 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
2265 {
2266+ if (frame.sve_save_and_probe == INVALID_REGNUM)
2267+ frame.sve_save_and_probe = regno;
2268 frame.reg_offset[regno] = offset;
2269 offset += vector_save_size;
2270 }
2271@@ -7531,10 +7531,18 @@ aarch64_layout_frame (void)
2272 frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
2273 bool saves_below_hard_fp_p
2274 = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
2275+ gcc_assert (!saves_below_hard_fp_p
2276+ || (frame.sve_save_and_probe != INVALID_REGNUM
2277+ && known_eq (frame.reg_offset[frame.sve_save_and_probe],
2278+ frame.bytes_below_saved_regs)));
2279+
2280 frame.bytes_below_hard_fp = offset;
2281+ frame.hard_fp_save_and_probe = INVALID_REGNUM;
2282
2283 auto allocate_gpr_slot = [&](unsigned int regno)
2284 {
2285+ if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
2286+ frame.hard_fp_save_and_probe = regno;
2287 frame.reg_offset[regno] = offset;
2288 if (frame.wb_candidate1 == INVALID_REGNUM)
2289 frame.wb_candidate1 = regno;
2290@@ -7568,6 +7576,8 @@ aarch64_layout_frame (void)
2291 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2292 if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
2293 {
2294+ if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
2295+ frame.hard_fp_save_and_probe = regno;
2296 /* If there is an alignment gap between integer and fp callee-saves,
2297 allocate the last fp register to it if possible. */
2298 if (regno == last_fp_reg
2299@@ -7591,6 +7601,17 @@ aarch64_layout_frame (void)
2300 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2301
2302 frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
2303+ gcc_assert (known_eq (frame.saved_regs_size,
2304+ frame.below_hard_fp_saved_regs_size)
2305+ || (frame.hard_fp_save_and_probe != INVALID_REGNUM
2306+ && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe],
2307+ frame.bytes_below_hard_fp)));
2308+
2309+ /* With stack-clash, a register must be saved in non-leaf functions.
2310+ The saving of the bottommost register counts as an implicit probe,
2311+ which allows us to maintain the invariant described in the comment
2312+ at expand_prologue. */
2313+ gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0));
2314
2315 offset += get_frame_size ();
2316 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2317@@ -7690,6 +7711,25 @@ aarch64_layout_frame (void)
2318 frame.final_adjust = frame.bytes_below_saved_regs;
2319 }
2320
2321+ /* The frame is allocated in pieces, with each non-final piece
2322+ including a register save at offset 0 that acts as a probe for
2323+ the following piece. In addition, the save of the bottommost register
2324+ acts as a probe for callees and allocas. Roll back any probes that
2325+ aren't needed.
2326+
2327+ A probe isn't needed if it is associated with the final allocation
2328+ (including callees and allocas) that happens before the epilogue is
2329+ executed. */
2330+ if (crtl->is_leaf
2331+ && !cfun->calls_alloca
2332+ && known_eq (frame.final_adjust, 0))
2333+ {
2334+ if (maybe_ne (frame.sve_callee_adjust, 0))
2335+ frame.sve_save_and_probe = INVALID_REGNUM;
2336+ else
2337+ frame.hard_fp_save_and_probe = INVALID_REGNUM;
2338+ }
2339+
2340 /* Make sure the individual adjustments add up to the full frame size. */
2341 gcc_assert (known_eq (frame.initial_adjust
2342 + frame.callee_adjust
2343@@ -8267,13 +8307,6 @@ aarch64_get_separate_components (void)
2344
2345 poly_int64 offset = frame.reg_offset[regno];
2346
2347- /* If the register is saved in the first SVE save slot, we use
2348- it as a stack probe for -fstack-clash-protection. */
2349- if (flag_stack_clash_protection
2350- && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
2351- && known_eq (offset, frame.bytes_below_saved_regs))
2352- continue;
2353-
2354 /* Get the offset relative to the register we'll use. */
2355 if (frame_pointer_needed)
2356 offset -= frame.bytes_below_hard_fp;
2357@@ -8308,6 +8341,13 @@ aarch64_get_separate_components (void)
2358
2359 bitmap_clear_bit (components, LR_REGNUM);
2360 bitmap_clear_bit (components, SP_REGNUM);
2361+ if (flag_stack_clash_protection)
2362+ {
2363+ if (frame.sve_save_and_probe != INVALID_REGNUM)
2364+ bitmap_clear_bit (components, frame.sve_save_and_probe);
2365+ if (frame.hard_fp_save_and_probe != INVALID_REGNUM)
2366+ bitmap_clear_bit (components, frame.hard_fp_save_and_probe);
2367+ }
2368
2369 return components;
2370 }
2371@@ -8844,8 +8884,8 @@ aarch64_epilogue_uses (int regno)
2372 When probing is needed, we emit a probe at the start of the prologue
2373 and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
2374
2375- We have to track how much space has been allocated and the only stores
2376- to the stack we track as implicit probes are the FP/LR stores.
2377+ We can also use register saves as probes. These are stored in
2378+ sve_save_and_probe and hard_fp_save_and_probe.
2379
2380 For outgoing arguments we probe if the size is larger than 1KB, such that
2381 the ABI specified buffer is maintained for the next callee.
2382diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
2383index c8ec3d58495..97173e48598 100644
2384--- a/gcc/config/aarch64/aarch64.h
2385+++ b/gcc/config/aarch64/aarch64.h
2386@@ -911,6 +911,14 @@ struct GTY (()) aarch64_frame
2387 This is the register they should use. */
2388 unsigned spare_pred_reg;
2389
2390+ /* An SVE register that is saved below the hard frame pointer and that acts
2391+ as a probe for later allocations, or INVALID_REGNUM if none. */
2392+ unsigned sve_save_and_probe;
2393+
2394+ /* A register that is saved at the hard frame pointer and that acts
2395+ as a probe for later allocations, or INVALID_REGNUM if none. */
2396+ unsigned hard_fp_save_and_probe;
2397+
2398 bool laid_out;
2399 };
2400
2401diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
2402index 3e01ec36c3a..3530a0d504b 100644
2403--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
2404+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
2405@@ -11,11 +11,10 @@
2406 ** mov x11, sp
2407 ** ...
2408 ** sub sp, sp, x13
2409-** str p4, \[sp\]
2410 ** cbz w0, [^\n]*
2411+** str p4, \[sp\]
2412 ** ...
2413 ** ptrue p0\.b, all
2414-** ldr p4, \[sp\]
2415 ** addvl sp, sp, #1
2416 ** ldr x24, \[sp\], 32
2417 ** ret
2418@@ -39,13 +38,12 @@ test_1 (int n)
2419 ** mov x11, sp
2420 ** ...
2421 ** sub sp, sp, x13
2422-** str p4, \[sp\]
2423 ** cbz w0, [^\n]*
2424+** str p4, \[sp\]
2425 ** str p5, \[sp, #1, mul vl\]
2426 ** str p6, \[sp, #2, mul vl\]
2427 ** ...
2428 ** ptrue p0\.b, all
2429-** ldr p4, \[sp\]
2430 ** addvl sp, sp, #1
2431 ** ldr x24, \[sp\], 32
2432 ** ret
2433--
24342.34.1
2435
2436
2437From 4bbf7b6cdd02b0d547ddd6a630f2065680bf2f6b Mon Sep 17 00:00:00 2001
2438From: Richard Sandiford <richard.sandiford@arm.com>
2439Date: Tue, 12 Sep 2023 16:19:52 +0100
2440Subject: [PATCH 18/19] aarch64: Remove below_hard_fp_saved_regs_size
2441
2442After previous patches, it's no longer necessary to store
2443saved_regs_size and below_hard_fp_saved_regs_size in the frame info.
2444All measurements instead use the top or bottom of the frame as
2445reference points.
2446
2447gcc/
2448 * config/aarch64/aarch64.h (aarch64_frame::saved_regs_size)
2449 (aarch64_frame::below_hard_fp_saved_regs_size): Delete.
2450 * config/aarch64/aarch64.c (aarch64_layout_frame): Update accordingly.
2451---
2452 gcc/config/aarch64/aarch64.c | 45 +++++++++++++++++-------------------
2453 gcc/config/aarch64/aarch64.h | 7 ------
2454 2 files changed, 21 insertions(+), 31 deletions(-)
2455
2456diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
2457index ef4b3b671ba..385718a475b 100644
2458--- a/gcc/config/aarch64/aarch64.c
2459+++ b/gcc/config/aarch64/aarch64.c
2460@@ -7528,9 +7528,8 @@ aarch64_layout_frame (void)
2461
2462 /* OFFSET is now the offset of the hard frame pointer from the bottom
2463 of the callee save area. */
2464- frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
2465- bool saves_below_hard_fp_p
2466- = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
2467+ auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
2468+ bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0);
2469 gcc_assert (!saves_below_hard_fp_p
2470 || (frame.sve_save_and_probe != INVALID_REGNUM
2471 && known_eq (frame.reg_offset[frame.sve_save_and_probe],
2472@@ -7600,9 +7599,8 @@ aarch64_layout_frame (void)
2473
2474 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2475
2476- frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
2477- gcc_assert (known_eq (frame.saved_regs_size,
2478- frame.below_hard_fp_saved_regs_size)
2479+ auto saved_regs_size = offset - frame.bytes_below_saved_regs;
2480+ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)
2481 || (frame.hard_fp_save_and_probe != INVALID_REGNUM
2482 && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe],
2483 frame.bytes_below_hard_fp)));
2484@@ -7611,7 +7609,7 @@ aarch64_layout_frame (void)
2485 The saving of the bottommost register counts as an implicit probe,
2486 which allows us to maintain the invariant described in the comment
2487 at expand_prologue. */
2488- gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0));
2489+ gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
2490
2491 offset += get_frame_size ();
2492 offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2493@@ -7637,7 +7635,7 @@ aarch64_layout_frame (void)
2494
2495 HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
2496 HOST_WIDE_INT const_saved_regs_size;
2497- if (known_eq (frame.saved_regs_size, 0))
2498+ if (known_eq (saved_regs_size, 0))
2499 frame.initial_adjust = frame.frame_size;
2500 else if (frame.frame_size.is_constant (&const_size)
2501 && const_size < max_push_offset
2502@@ -7650,7 +7648,7 @@ aarch64_layout_frame (void)
2503 frame.callee_adjust = const_size;
2504 }
2505 else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
2506- && frame.saved_regs_size.is_constant (&const_saved_regs_size)
2507+ && saved_regs_size.is_constant (&const_saved_regs_size)
2508 && const_below_saved_regs + const_saved_regs_size < 512
2509 /* We could handle this case even with data below the saved
2510 registers, provided that that data left us with valid offsets
2511@@ -7669,8 +7667,7 @@ aarch64_layout_frame (void)
2512 frame.initial_adjust = frame.frame_size;
2513 }
2514 else if (saves_below_hard_fp_p
2515- && known_eq (frame.saved_regs_size,
2516- frame.below_hard_fp_saved_regs_size))
2517+ && known_eq (saved_regs_size, below_hard_fp_saved_regs_size))
2518 {
2519 /* Frame in which all saves are SVE saves:
2520
2521@@ -7692,7 +7689,7 @@ aarch64_layout_frame (void)
2522 [save SVE registers relative to SP]
2523 sub sp, sp, bytes_below_saved_regs */
2524 frame.callee_adjust = const_above_fp;
2525- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
2526+ frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
2527 frame.final_adjust = frame.bytes_below_saved_regs;
2528 }
2529 else
2530@@ -7707,7 +7704,7 @@ aarch64_layout_frame (void)
2531 [save SVE registers relative to SP]
2532 sub sp, sp, bytes_below_saved_regs */
2533 frame.initial_adjust = frame.bytes_above_hard_fp;
2534- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
2535+ frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
2536 frame.final_adjust = frame.bytes_below_saved_regs;
2537 }
2538
2539@@ -8849,17 +8846,17 @@ aarch64_epilogue_uses (int regno)
2540 | local variables | <-- frame_pointer_rtx
2541 | |
2542 +-------------------------------+
2543- | padding | \
2544- +-------------------------------+ |
2545- | callee-saved registers | | frame.saved_regs_size
2546- +-------------------------------+ |
2547- | LR' | |
2548- +-------------------------------+ |
2549- | FP' | |
2550- +-------------------------------+ |<- hard_frame_pointer_rtx (aligned)
2551- | SVE vector registers | | \
2552- +-------------------------------+ | | below_hard_fp_saved_regs_size
2553- | SVE predicate registers | / /
2554+ | padding |
2555+ +-------------------------------+
2556+ | callee-saved registers |
2557+ +-------------------------------+
2558+ | LR' |
2559+ +-------------------------------+
2560+ | FP' |
2561+ +-------------------------------+ <-- hard_frame_pointer_rtx (aligned)
2562+ | SVE vector registers |
2563+ +-------------------------------+
2564+ | SVE predicate registers |
2565 +-------------------------------+
2566 | dynamic allocation |
2567 +-------------------------------+
2568diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
2569index 97173e48598..9084b1cfb9d 100644
2570--- a/gcc/config/aarch64/aarch64.h
2571+++ b/gcc/config/aarch64/aarch64.h
2572@@ -837,18 +837,11 @@ struct GTY (()) aarch64_frame
2573 STACK_BOUNDARY. */
2574 HOST_WIDE_INT saved_varargs_size;
2575
2576- /* The size of the callee-save registers with a slot in REG_OFFSET. */
2577- poly_int64 saved_regs_size;
2578-
2579 /* The number of bytes between the bottom of the static frame (the bottom
2580 of the outgoing arguments) and the bottom of the register save area.
2581 This value is always a multiple of STACK_BOUNDARY. */
2582 poly_int64 bytes_below_saved_regs;
2583
2584- /* The size of the callee-save registers with a slot in REG_OFFSET that
2585- are saved below the hard frame pointer. */
2586- poly_int64 below_hard_fp_saved_regs_size;
2587-
2588 /* The number of bytes between the bottom of the static frame (the bottom
2589 of the outgoing arguments) and the hard frame pointer. This value is
2590 always a multiple of STACK_BOUNDARY. */
2591--
25922.34.1
2593
2594
2595From bea0985749c12fcc264710586addb7838cc61e6d Mon Sep 17 00:00:00 2001
2596From: Richard Sandiford <richard.sandiford@arm.com>
2597Date: Tue, 12 Sep 2023 16:19:52 +0100
2598Subject: [PATCH 19/19] aarch64: Make stack smash canary protect saved
2599 registers
2600
2601AArch64 normally puts the saved registers near the bottom of the frame,
2602immediately above any dynamic allocations. But this means that a
2603stack-smash attack on those dynamic allocations could overwrite the
2604saved registers without needing to reach as far as the stack smash
2605canary.
2606
2607The same thing could also happen for variable-sized arguments that are
2608passed by value, since those are allocated before a call and popped on
2609return.
2610
2611This patch avoids that by putting the locals (and thus the canary) below
2612the saved registers when stack smash protection is active.
2613
2614The patch fixes CVE-2023-4039.
2615
2616gcc/
2617 * config/aarch64/aarch64.c (aarch64_save_regs_above_locals_p):
2618 New function.
2619 (aarch64_layout_frame): Use it to decide whether locals should
2620 go above or below the saved registers.
2621 (aarch64_expand_prologue): Update stack layout comment.
2622 Emit a stack tie after the final adjustment.
2623
2624gcc/testsuite/
2625 * gcc.target/aarch64/stack-protector-8.c: New test.
2626 * gcc.target/aarch64/stack-protector-9.c: Likewise.
2627---
2628 gcc/config/aarch64/aarch64.c | 46 +++++++--
2629 .../gcc.target/aarch64/stack-protector-8.c | 95 +++++++++++++++++++
2630 .../gcc.target/aarch64/stack-protector-9.c | 33 +++++++
2631 3 files changed, 168 insertions(+), 6 deletions(-)
2632 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
2633 create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
2634
2635diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
2636index 385718a475b..3ccfd3c30fc 100644
2637--- a/gcc/config/aarch64/aarch64.c
2638+++ b/gcc/config/aarch64/aarch64.c
2639@@ -7392,6 +7392,20 @@ aarch64_needs_frame_chain (void)
2640 return aarch64_use_frame_pointer;
2641 }
2642
2643+/* Return true if the current function should save registers above
2644+ the locals area, rather than below it. */
2645+
2646+static bool
2647+aarch64_save_regs_above_locals_p ()
2648+{
2649+ /* When using stack smash protection, make sure that the canary slot
2650+ comes between the locals and the saved registers. Otherwise,
2651+ it would be possible for a carefully sized smash attack to change
2652+ the saved registers (particularly LR and FP) without reaching the
2653+ canary. */
2654+ return crtl->stack_protect_guard;
2655+}
2656+
2657 /* Mark the registers that need to be saved by the callee and calculate
2658 the size of the callee-saved registers area and frame record (both FP
2659 and LR may be omitted). */
2660@@ -7403,6 +7417,7 @@ aarch64_layout_frame (void)
2661 poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
2662 bool frame_related_fp_reg_p = false;
2663 aarch64_frame &frame = cfun->machine->frame;
2664+ poly_int64 top_of_locals = -1;
2665
2666 frame.emit_frame_chain = aarch64_needs_frame_chain ();
2667
2668@@ -7469,9 +7484,16 @@ aarch64_layout_frame (void)
2669 && !crtl->abi->clobbers_full_reg_p (regno))
2670 frame.reg_offset[regno] = SLOT_REQUIRED;
2671
2672+ bool regs_at_top_p = aarch64_save_regs_above_locals_p ();
2673
2674 poly_int64 offset = crtl->outgoing_args_size;
2675 gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
2676+ if (regs_at_top_p)
2677+ {
2678+ offset += get_frame_size ();
2679+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2680+ top_of_locals = offset;
2681+ }
2682 frame.bytes_below_saved_regs = offset;
2683 frame.sve_save_and_probe = INVALID_REGNUM;
2684
2685@@ -7611,15 +7633,18 @@ aarch64_layout_frame (void)
2686 at expand_prologue. */
2687 gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
2688
2689- offset += get_frame_size ();
2690- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2691- auto top_of_locals = offset;
2692-
2693+ if (!regs_at_top_p)
2694+ {
2695+ offset += get_frame_size ();
2696+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2697+ top_of_locals = offset;
2698+ }
2699 offset += frame.saved_varargs_size;
2700 gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
2701 frame.frame_size = offset;
2702
2703 frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
2704+ gcc_assert (known_ge (top_of_locals, 0));
2705 frame.bytes_above_locals = frame.frame_size - top_of_locals;
2706
2707 frame.initial_adjust = 0;
2708@@ -8843,10 +8868,10 @@ aarch64_epilogue_uses (int regno)
2709 | for register varargs |
2710 | |
2711 +-------------------------------+
2712- | local variables | <-- frame_pointer_rtx
2713+ | local variables (1) | <-- frame_pointer_rtx
2714 | |
2715 +-------------------------------+
2716- | padding |
2717+ | padding (1) |
2718 +-------------------------------+
2719 | callee-saved registers |
2720 +-------------------------------+
2721@@ -8858,6 +8883,10 @@ aarch64_epilogue_uses (int regno)
2722 +-------------------------------+
2723 | SVE predicate registers |
2724 +-------------------------------+
2725+ | local variables (2) |
2726+ +-------------------------------+
2727+ | padding (2) |
2728+ +-------------------------------+
2729 | dynamic allocation |
2730 +-------------------------------+
2731 | padding |
2732@@ -8867,6 +8896,9 @@ aarch64_epilogue_uses (int regno)
2733 +-------------------------------+
2734 | | <-- stack_pointer_rtx (aligned)
2735
2736+ The regions marked (1) and (2) are mutually exclusive. (2) is used
2737+ when aarch64_save_regs_above_locals_p is true.
2738+
2739 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2740 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2741 unchanged.
2742@@ -9058,6 +9090,8 @@ aarch64_expand_prologue (void)
2743 gcc_assert (known_eq (bytes_below_sp, final_adjust));
2744 aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
2745 !frame_pointer_needed, true);
2746+ if (emit_frame_chain && maybe_ne (final_adjust, 0))
2747+ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
2748 }
2749
2750 /* Return TRUE if we can use a simple_return insn.
2751diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
2752new file mode 100644
2753index 00000000000..e71d820e365
2754--- /dev/null
2755+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
2756@@ -0,0 +1,95 @@
2757+/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */
2758+/* { dg-final { check-function-bodies "**" "" } } */
2759+
2760+void g(void *);
2761+__SVBool_t *h(void *);
2762+
2763+/*
2764+** test1:
2765+** sub sp, sp, #288
2766+** stp x29, x30, \[sp, #?272\]
2767+** add x29, sp, #?272
2768+** mrs (x[0-9]+), tpidr2_el0
2769+** ldr (x[0-9]+), \[\1, #?16\]
2770+** str \2, \[sp, #?264\]
2771+** mov \2, #?0
2772+** add x0, sp, #?8
2773+** bl g
2774+** ...
2775+** mrs .*
2776+** ...
2777+** bne .*
2778+** ...
2779+** ldp x29, x30, \[sp, #?272\]
2780+** add sp, sp, #?288
2781+** ret
2782+** bl __stack_chk_fail
2783+*/
2784+int test1() {
2785+ int y[0x40];
2786+ g(y);
2787+ return 1;
2788+}
2789+
2790+/*
2791+** test2:
2792+** stp x29, x30, \[sp, #?-16\]!
2793+** mov x29, sp
2794+** sub sp, sp, #1040
2795+** mrs (x[0-9]+), tpidr2_el0
2796+** ldr (x[0-9]+), \[\1, #?16\]
2797+** str \2, \[sp, #?1032\]
2798+** mov \2, #?0
2799+** add x0, sp, #?8
2800+** bl g
2801+** ...
2802+** mrs .*
2803+** ...
2804+** bne .*
2805+** ...
2806+** add sp, sp, #?1040
2807+** ldp x29, x30, \[sp\], #?16
2808+** ret
2809+** bl __stack_chk_fail
2810+*/
2811+int test2() {
2812+ int y[0x100];
2813+ g(y);
2814+ return 1;
2815+}
2816+
2817+#pragma GCC target "+sve"
2818+
2819+/*
2820+** test3:
2821+** stp x29, x30, \[sp, #?-16\]!
2822+** mov x29, sp
2823+** addvl sp, sp, #-18
2824+** ...
2825+** str p4, \[sp\]
2826+** ...
2827+** sub sp, sp, #272
2828+** mrs (x[0-9]+), tpidr2_el0
2829+** ldr (x[0-9]+), \[\1, #?16\]
2830+** str \2, \[sp, #?264\]
2831+** mov \2, #?0
2832+** add x0, sp, #?8
2833+** bl h
2834+** ...
2835+** mrs .*
2836+** ...
2837+** bne .*
2838+** ...
2839+** add sp, sp, #?272
2840+** ...
2841+** ldr p4, \[sp\]
2842+** ...
2843+** addvl sp, sp, #18
2844+** ldp x29, x30, \[sp\], #?16
2845+** ret
2846+** bl __stack_chk_fail
2847+*/
2848+__SVBool_t test3() {
2849+ int y[0x40];
2850+ return *h(y);
2851+}
2852diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
2853new file mode 100644
2854index 00000000000..58f322aa480
2855--- /dev/null
2856+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
2857@@ -0,0 +1,33 @@
2858+/* { dg-options "-O2 -mcpu=neoverse-v1 -fstack-protector-all" } */
2859+/* { dg-final { check-function-bodies "**" "" } } */
2860+
2861+/*
2862+** main:
2863+** ...
2864+** stp x29, x30, \[sp, #?-[0-9]+\]!
2865+** ...
2866+** sub sp, sp, #[0-9]+
2867+** ...
2868+** str x[0-9]+, \[x29, #?-8\]
2869+** ...
2870+*/
2871+int f(const char *);
2872+void g(void *);
2873+int main(int argc, char* argv[])
2874+{
2875+ int a;
2876+ int b;
2877+ char c[2+f(argv[1])];
2878+ int d[0x100];
2879+ char y;
2880+
2881+ y=42; a=4; b=10;
2882+ c[0] = 'h'; c[1] = '\0';
2883+
2884+ c[f(argv[2])] = '\0';
2885+
2886+ __builtin_printf("%d %d\n%s\n", a, b, c);
2887+ g(d);
2888+
2889+ return 0;
2890+}
2891--
28922.34.1
2893
diff --git a/meta/recipes-devtools/gcc/gcc_11.4.bb b/meta/recipes-devtools/gcc/gcc_11.5.bb
index 255fe552bd..255fe552bd 100644
--- a/meta/recipes-devtools/gcc/gcc_11.4.bb
+++ b/meta/recipes-devtools/gcc/gcc_11.5.bb
diff --git a/meta/recipes-devtools/gcc/libgcc-initial_11.4.bb b/meta/recipes-devtools/gcc/libgcc-initial_11.5.bb
index a259082b47..a259082b47 100644
--- a/meta/recipes-devtools/gcc/libgcc-initial_11.4.bb
+++ b/meta/recipes-devtools/gcc/libgcc-initial_11.5.bb
diff --git a/meta/recipes-devtools/gcc/libgcc_11.4.bb b/meta/recipes-devtools/gcc/libgcc_11.5.bb
index f88963b0a4..f88963b0a4 100644
--- a/meta/recipes-devtools/gcc/libgcc_11.4.bb
+++ b/meta/recipes-devtools/gcc/libgcc_11.5.bb
diff --git a/meta/recipes-devtools/gcc/libgfortran_11.4.bb b/meta/recipes-devtools/gcc/libgfortran_11.5.bb
index 71dd8b4bdc..71dd8b4bdc 100644
--- a/meta/recipes-devtools/gcc/libgfortran_11.4.bb
+++ b/meta/recipes-devtools/gcc/libgfortran_11.5.bb