diff options
Diffstat (limited to 'meta-microblaze/recipes-core/newlib/files/0009-Patch-MicroBlaze-Removing-the-Assembly-implementatio.patch')
| -rw-r--r-- | meta-microblaze/recipes-core/newlib/files/0009-Patch-MicroBlaze-Removing-the-Assembly-implementatio.patch | 341 |
1 files changed, 341 insertions, 0 deletions
diff --git a/meta-microblaze/recipes-core/newlib/files/0009-Patch-MicroBlaze-Removing-the-Assembly-implementatio.patch b/meta-microblaze/recipes-core/newlib/files/0009-Patch-MicroBlaze-Removing-the-Assembly-implementatio.patch new file mode 100644 index 00000000..2a3d8632 --- /dev/null +++ b/meta-microblaze/recipes-core/newlib/files/0009-Patch-MicroBlaze-Removing-the-Assembly-implementatio.patch | |||
| @@ -0,0 +1,341 @@ | |||
| 1 | From 5a7955b68f5066b00413e751d1de967181c88b94 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Mahesh Bodapati <mbodapat@xilinx.com> | ||
| 3 | Date: Tue, 9 Nov 2021 22:53:44 +0530 | ||
| 4 | Subject: [PATCH 09/11] [Patch,MicroBlaze] : Removing the Assembly | ||
| 5 | implementation of 64bit string function. | ||
| 6 | |||
| 7 | Revisit in next release and fix it | ||
| 8 | |||
| 9 | (cherry picked from commit 6b8e5c7a773de4609f9c855aa714eca5a3f8b4ab) | ||
| 10 | --- | ||
| 11 | newlib/libc/machine/microblaze/mb_endian.h | 4 + | ||
| 12 | newlib/libc/machine/microblaze/strcmp.c | 95 ++++++++-------------- | ||
| 13 | newlib/libc/machine/microblaze/strcpy.c | 82 ++++++------------- | ||
| 14 | newlib/libc/machine/microblaze/strlen.c | 59 +++++--------- | ||
| 15 | 4 files changed, 82 insertions(+), 158 deletions(-) | ||
| 16 | |||
| 17 | diff --git a/newlib/libc/machine/microblaze/mb_endian.h b/newlib/libc/machine/microblaze/mb_endian.h | ||
| 18 | index fb217ec85..17772c88f 100644 | ||
| 19 | --- a/newlib/libc/machine/microblaze/mb_endian.h | ||
| 20 | +++ b/newlib/libc/machine/microblaze/mb_endian.h | ||
| 21 | @@ -8,8 +8,12 @@ | ||
| 22 | #ifdef __LITTLE_ENDIAN__ | ||
| 23 | #define LOAD4BYTES(rD,rA,rB) "\tlwr\t" rD ", " rA ", " rB "\n" | ||
| 24 | #define STORE4BYTES(rD,rA,rB) "\tswr\t" rD ", " rA ", " rB "\n" | ||
| 25 | +#define LOAD8BYTES(rD,rA,rB) "\tllr\t" rD ", " rA ", " rB "\n" | ||
| 26 | +#define STORE8BYTES(rD,rA,rB) "\tslr\t" rD ", " rA ", " rB "\n" | ||
| 27 | #else | ||
| 28 | #define LOAD4BYTES(rD,rA,rB) "\tlw\t" rD ", " rA ", " rB "\n" | ||
| 29 | #define STORE4BYTES(rD,rA,rB) "\tsw\t" rD ", " rA ", " rB "\n" | ||
| 30 | +#define LOAD8BYTES(rD,rA,rB) "\tll\t" rD ", " rA ", " rB "\n" | ||
| 31 | +#define STORE8BYTES(rD,rA,rB) "\tsl\t" rD ", " rA ", " rB "\n" | ||
| 32 | #endif | ||
| 33 | #endif | ||
| 34 | diff --git a/newlib/libc/machine/microblaze/strcmp.c b/newlib/libc/machine/microblaze/strcmp.c | ||
| 35 | index 2cfef7388..e34c64a0c 100644 | ||
| 36 | --- a/newlib/libc/machine/microblaze/strcmp.c | ||
| 37 | +++ b/newlib/libc/machine/microblaze/strcmp.c | ||
| 38 | @@ -129,70 +129,42 @@ strcmp (const char *s1, | ||
| 39 | return (*(unsigned char *) s1) - (*(unsigned char *) s2); | ||
| 40 | #endif /* not PREFER_SIZE_OVER_SPEED */ | ||
| 41 | |||
| 42 | +#elif __arch64__ | ||
| 43 | + unsigned int *a1; | ||
| 44 | + unsigned int *a2; | ||
| 45 | + | ||
| 46 | + /* If s1 or s2 are unaligned, then compare bytes. */ | ||
| 47 | + if (!UNALIGNED (s1, s2)) | ||
| 48 | + { | ||
| 49 | + /* If s1 and s2 are word-aligned, compare them a word at a time. */ | ||
| 50 | + a1 = (unsigned int*)s1; | ||
| 51 | + a2 = (unsigned int*)s2; | ||
| 52 | + while (*a1 == *a2) | ||
| 53 | + { | ||
| 54 | + /* To get here, *a1 == *a2, thus if we find a null in *a1, | ||
| 55 | + then the strings must be equal, so return zero. */ | ||
| 56 | + if (DETECTNULL (*a1)) | ||
| 57 | + return 0; | ||
| 58 | + | ||
| 59 | + a1++; | ||
| 60 | + a2++; | ||
| 61 | + } | ||
| 62 | + | ||
| 63 | + /* A difference was detected in last few bytes of s1, so search bytewise */ | ||
| 64 | + s1 = (char*)a1; | ||
| 65 | + s2 = (char*)a2; | ||
| 66 | + } | ||
| 67 | + | ||
| 68 | + while (*s1 != '\0' && *s1 == *s2) | ||
| 69 | + { | ||
| 70 | + s1++; | ||
| 71 | + s2++; | ||
| 72 | + } | ||
| 73 | + return (*(unsigned char *) s1) - (*(unsigned char *) s2); | ||
| 74 | #else | ||
| 75 | |||
| 76 | #include "mb_endian.h" | ||
| 77 | |||
| 78 | -#ifdef __arch64__ | ||
| 79 | - asm volatile (" \n\ | ||
| 80 | - orl r9, r0, r0 /* Index register */ \n\ | ||
| 81 | -check_alignment: \n\ | ||
| 82 | - andli r3, r5, 3 \n\ | ||
| 83 | - andli r4, r6, 3 \n\ | ||
| 84 | - beanei r3, try_align_args \n\ | ||
| 85 | - beanei r4, regular_strcmp /* At this point we don't have a choice */ \n\ | ||
| 86 | -cmp_loop: \n" | ||
| 87 | - LOAD4BYTES("r3", "r5", "r9") | ||
| 88 | - LOAD4BYTES("r4", "r6", "r9") | ||
| 89 | -" \n\ | ||
| 90 | - pcmplbf r7, r3, r0 /* See if there is Null byte */ \n\ | ||
| 91 | - beanei r7, end_cmp_loop /* IF yes (r7 > 0) use byte compares in end_cmp_loop */ \n\ | ||
| 92 | - cmplu r7, r4, r3 /* ELSE compare whole word */ \n\ | ||
| 93 | - beanei r7, end_cmp \n\ | ||
| 94 | - addlik r9, r9, 4 /* delay slot */ \n\ | ||
| 95 | - breaid cmp_loop \n\ | ||
| 96 | - nop /* delay slot */ \n\ | ||
| 97 | -end_cmp_loop: \n\ | ||
| 98 | - lbu r3, r5, r9 /* byte compare loop */ \n\ | ||
| 99 | - lbu r4, r6, r9 \n\ | ||
| 100 | - cmplu r7, r4, r3 /* Compare bytes */ \n\ | ||
| 101 | - beanei r7, end_cmp_early \n\ | ||
| 102 | - addlik r9, r9, 1 /* delay slot */ \n\ | ||
| 103 | - beaneid r3, end_cmp_loop /* If reached null on one string, terminate */ \n\ | ||
| 104 | - nop \n\ | ||
| 105 | -end_cmp_early: \n\ | ||
| 106 | - orl r3, r0, r7 /* delay slot */ \n\ | ||
| 107 | - rtsd r15, 8 \n\ | ||
| 108 | - nop \n\ | ||
| 109 | -try_align_args: \n\ | ||
| 110 | - xorl r7, r4, r3 \n\ | ||
| 111 | - beanei r7, regular_strcmp /* cannot align args */ \n\ | ||
| 112 | - rsublik r10, r3, 4 /* Number of initial bytes to align */ \n\ | ||
| 113 | -align_loop: \n\ | ||
| 114 | - lbu r3, r5, r9 \n\ | ||
| 115 | - lbu r4, r6, r9 \n\ | ||
| 116 | - cmplu r7, r4, r3 \n\ | ||
| 117 | - beanei r7, end_cmp \n\ | ||
| 118 | - beaeqi r3, end_cmp \n\ | ||
| 119 | - addlik r10, r10, -1 \n\ | ||
| 120 | - addlik r9, r9, 1 \n\ | ||
| 121 | - beaeqid r10, cmp_loop \n\ | ||
| 122 | - nop \n\ | ||
| 123 | - breai align_loop \n\ | ||
| 124 | -regular_strcmp: \n\ | ||
| 125 | - lbu r3, r5, r9 \n\ | ||
| 126 | - lbu r4, r6, r9 \n\ | ||
| 127 | - cmplu r7, r4, r3 \n\ | ||
| 128 | - beanei r7, end_cmp \n\ | ||
| 129 | - beaeqi r3, end_cmp \n\ | ||
| 130 | - addlik r9, r9, 1 \n\ | ||
| 131 | - breaid regular_strcmp \n\ | ||
| 132 | - nop \n\ | ||
| 133 | -end_cmp: \n\ | ||
| 134 | - orl r3, r0, r7 \n\ | ||
| 135 | - rtsd r15, 8 \n\ | ||
| 136 | - nop /* Return strcmp result */"); | ||
| 137 | -#else | ||
| 138 | asm volatile (" \n\ | ||
| 139 | or r9, r0, r0 /* Index register */\n\ | ||
| 140 | check_alignment: \n\ | ||
| 141 | @@ -241,12 +213,11 @@ regular_strcmp: | ||
| 142 | bnei r7, end_cmp \n\ | ||
| 143 | beqi r3, end_cmp \n\ | ||
| 144 | brid regular_strcmp \n\ | ||
| 145 | - addik r9, r9, 1 | ||
| 146 | + addik r9, r9, 1 \n\ | ||
| 147 | end_cmp: \n\ | ||
| 148 | rtsd r15, 8 \n\ | ||
| 149 | or r3, r0, r7 /* Return strcmp result */"); | ||
| 150 | |||
| 151 | -#endif | ||
| 152 | #endif /* ! HAVE_HW_PCMP */ | ||
| 153 | } | ||
| 154 | |||
| 155 | diff --git a/newlib/libc/machine/microblaze/strcpy.c b/newlib/libc/machine/microblaze/strcpy.c | ||
| 156 | index 6dbc60d77..ddb69227e 100644 | ||
| 157 | --- a/newlib/libc/machine/microblaze/strcpy.c | ||
| 158 | +++ b/newlib/libc/machine/microblaze/strcpy.c | ||
| 159 | @@ -121,67 +121,36 @@ strcpy (char *__restrict dst0, | ||
| 160 | ; | ||
| 161 | return dst0; | ||
| 162 | #endif /* not PREFER_SIZE_OVER_SPEED */ | ||
| 163 | +#elif __arch64__ | ||
| 164 | + char *dst = dst0; | ||
| 165 | + const char *src = src0; | ||
| 166 | + long *aligned_dst; | ||
| 167 | + const long *aligned_src; | ||
| 168 | |||
| 169 | -#else | ||
| 170 | + /* If SRC or DEST is unaligned, then copy bytes. */ | ||
| 171 | + if (!UNALIGNED (src, dst)) | ||
| 172 | + { | ||
| 173 | + aligned_dst = (long*)dst; | ||
| 174 | + aligned_src = (long*)src; | ||
| 175 | |||
| 176 | -#include "mb_endian.h" | ||
| 177 | -#ifdef __arch64__ | ||
| 178 | + /* SRC and DEST are both "long int" aligned, try to do "long int" | ||
| 179 | + sized copies. */ | ||
| 180 | + while (!DETECTNULL(*aligned_src)) | ||
| 181 | + { | ||
| 182 | + *aligned_dst++ = *aligned_src++; | ||
| 183 | + } | ||
| 184 | |||
| 185 | - asm volatile (" \n\ | ||
| 186 | - orl r9, r0, r0 /* Index register */ \n\ | ||
| 187 | -check_alignment: \n\ | ||
| 188 | - andli r3, r5, 3 \n\ | ||
| 189 | - andli r4, r6, 3 \n\ | ||
| 190 | - beanei r3, try_align_args \n\ | ||
| 191 | - beanei r4, regular_strcpy /* At this point we dont have a choice */ \n\ | ||
| 192 | -cpy_loop: \n" | ||
| 193 | - LOAD4BYTES("r3", "r6", "r9") | ||
| 194 | -" \n\ | ||
| 195 | - pcmplbf r4, r0, r3 \n\ | ||
| 196 | - beanei r4, cpy_bytes /* If r4 != 0, then null present within string */\n" | ||
| 197 | - STORE4BYTES("r3", "r5", "r9") | ||
| 198 | -" \n\ | ||
| 199 | - addlik r9, r9, 4 \n\ | ||
| 200 | - breaid cpy_loop \n\ | ||
| 201 | - nop \n\ | ||
| 202 | -cpy_bytes: \n\ | ||
| 203 | - lbu r3, r6, r9 \n\ | ||
| 204 | - sb r3, r5, r9 \n\ | ||
| 205 | - addlik r4, r4, -1 \n\ | ||
| 206 | - addlik r9, r9, 1 /* delay slot */\n\ | ||
| 207 | - beaneid r4, cpy_bytes \n\ | ||
| 208 | - nop \n\ | ||
| 209 | -cpy_null: \n\ | ||
| 210 | - orl r3, r0, r5 /* Return strcpy result */\n\ | ||
| 211 | - rtsd r15, 8 \n\ | ||
| 212 | - nop \n\ | ||
| 213 | -try_align_args: \n\ | ||
| 214 | - xorl r7, r4, r3 \n\ | ||
| 215 | - beanei r7, regular_strcpy /* cannot align args */\n\ | ||
| 216 | - rsublik r10, r3, 4 /* Number of initial bytes to align */\n\ | ||
| 217 | -align_loop: \n\ | ||
| 218 | - lbu r3, r6, r9 \n\ | ||
| 219 | - sb r3, r5, r9 \n\ | ||
| 220 | - addlik r10, r10, -1 \n\ | ||
| 221 | - beaeqid r3, end_cpy /* Break if we have seen null character */\n\ | ||
| 222 | - nop \n\ | ||
| 223 | - addlik r9, r9, 1 \n\ | ||
| 224 | - beaneid r10, align_loop \n\ | ||
| 225 | - nop \n\ | ||
| 226 | - breai cpy_loop \n\ | ||
| 227 | -regular_strcpy: \n\ | ||
| 228 | - lbu r3, r6, r9 \n\ | ||
| 229 | - sb r3, r5, r9 \n\ | ||
| 230 | - addlik r9, r9, 1 \n\ | ||
| 231 | - beaneid r3, regular_strcpy \n\ | ||
| 232 | - nop \n\ | ||
| 233 | -end_cpy: \n\ | ||
| 234 | - orl r3, r0, r5 \n\ | ||
| 235 | - rtsd r15, 8 \n\ | ||
| 236 | - nop /* Return strcpy result */"); | ||
| 237 | + dst = (char*)aligned_dst; | ||
| 238 | + src = (char*)aligned_src; | ||
| 239 | + } | ||
| 240 | |||
| 241 | -#else | ||
| 242 | + while (*dst++ = *src++) | ||
| 243 | + ; | ||
| 244 | + return dst0; | ||
| 245 | + | ||
| 246 | +#else | ||
| 247 | |||
| 248 | +#include "mb_endian.h" | ||
| 249 | asm volatile (" \n\ | ||
| 250 | or r9, r0, r0 /* Index register */ \n\ | ||
| 251 | check_alignment: \n\ | ||
| 252 | @@ -227,7 +196,6 @@ regular_strcpy: \n\ | ||
| 253 | end_cpy: \n\ | ||
| 254 | rtsd r15, 8 \n\ | ||
| 255 | or r3, r0, r5 /* Return strcpy result */"); | ||
| 256 | -#endif | ||
| 257 | #endif /* ! HAVE_HW_PCMP */ | ||
| 258 | } | ||
| 259 | |||
| 260 | diff --git a/newlib/libc/machine/microblaze/strlen.c b/newlib/libc/machine/microblaze/strlen.c | ||
| 261 | index b6f2d3c13..940753996 100644 | ||
| 262 | --- a/newlib/libc/machine/microblaze/strlen.c | ||
| 263 | +++ b/newlib/libc/machine/microblaze/strlen.c | ||
| 264 | @@ -112,47 +112,29 @@ strlen (const char *str) | ||
| 265 | return str - start; | ||
| 266 | #endif /* not PREFER_SIZE_OVER_SPEED */ | ||
| 267 | |||
| 268 | -#else | ||
| 269 | - | ||
| 270 | -#include "mb_endian.h" | ||
| 271 | +#elif __arch64__ | ||
| 272 | + const char *start = str; | ||
| 273 | + unsigned long *aligned_addr; | ||
| 274 | |||
| 275 | -#ifdef __arch64__ | ||
| 276 | - asm volatile (" \n\ | ||
| 277 | - orl r9, r0, r0 /* Index register */ \n\ | ||
| 278 | -check_alignment: \n\ | ||
| 279 | - andli r3, r5, 3 \n\ | ||
| 280 | - beanei r3, align_arg \n\ | ||
| 281 | -len_loop: \n" | ||
| 282 | - LOAD4BYTES("r3", "r5", "r9") | ||
| 283 | -" \n\ | ||
| 284 | - pcmplbf r4, r3, r0 \n\ | ||
| 285 | - beanei r4, end_len \n\ | ||
| 286 | - addlik r9, r9, 4 \n\ | ||
| 287 | - breaid len_loop \n\ | ||
| 288 | - nop \n\ | ||
| 289 | -end_len: \n\ | ||
| 290 | - lbu r3, r5, r9 \n\ | ||
| 291 | - beaeqi r3, done_len \n\ | ||
| 292 | - addlik r9, r9, 1 \n\ | ||
| 293 | - breaid end_len \n\ | ||
| 294 | - nop \n\ | ||
| 295 | -done_len: \n\ | ||
| 296 | - orl r3, r0, r9 /* Return len */ \n\ | ||
| 297 | - rtsd r15, 8 \n\ | ||
| 298 | - nop \n\ | ||
| 299 | -align_arg: \n\ | ||
| 300 | - rsublik r10, r3, 4 \n\ | ||
| 301 | -align_loop: \n\ | ||
| 302 | - lbu r3, r5, r9 \n\ | ||
| 303 | - addlik r10, r10, -1 \n\ | ||
| 304 | - beaeqid r3, done_len \n\ | ||
| 305 | - nop \n\ | ||
| 306 | - addlik r9, r9, 1 \n\ | ||
| 307 | - beaneid r10, align_loop \n\ | ||
| 308 | - nop \n\ | ||
| 309 | - breai len_loop"); | ||
| 310 | + if (!UNALIGNED (str)) | ||
| 311 | + { | ||
| 312 | + /* If the string is word-aligned, we can check for the presence of | ||
| 313 | + a null in each word-sized block. */ | ||
| 314 | + aligned_addr = (unsigned long*)str; | ||
| 315 | + while (!DETECTNULL (*aligned_addr)) | ||
| 316 | + aligned_addr++; | ||
| 317 | |||
| 318 | + /* Once a null is detected, we check each byte in that block for a | ||
| 319 | + precise position of the null. */ | ||
| 320 | + str = (char*)aligned_addr; | ||
| 321 | + } | ||
| 322 | + | ||
| 323 | + while (*str) | ||
| 324 | + str++; | ||
| 325 | + return str - start; | ||
| 326 | #else | ||
| 327 | + | ||
| 328 | +#include "mb_endian.h" | ||
| 329 | asm volatile (" \n\ | ||
| 330 | or r9, r0, r0 /* Index register */ \n\ | ||
| 331 | check_alignment: \n\ | ||
| 332 | @@ -183,6 +165,5 @@ align_loop: \n\ | ||
| 333 | addik r9, r9, 1 \n\ | ||
| 334 | bri len_loop"); | ||
| 335 | |||
| 336 | -#endif | ||
| 337 | #endif /* ! HAVE_HW_PCMP */ | ||
| 338 | } | ||
| 339 | -- | ||
| 340 | 2.34.1 | ||
| 341 | |||
