diff options
author | pgowda <pgowda.cve@gmail.com> | 2021-12-23 04:42:06 -0800 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2022-01-07 23:21:34 +0000 |
commit | f54df0078ed180b74bb199eafa9784222eecd89c (patch) | |
tree | b1c5a84114963881bfe91e78de7daec66cb32315 /meta | |
parent | 63206f475829172af6f2d93ba821562abceec7cf (diff) | |
download | poky-f54df0078ed180b74bb199eafa9784222eecd89c.tar.gz |
binutils: CVE-2021-42574
Upstream-Status: Backport [https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5]
(From OE-Core rev: 944a60cd74ea90dcced7684492a808fbfd6710af)
Signed-off-by: pgowda <pgowda.cve@gmail.com>
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'meta')
-rw-r--r-- | meta/recipes-devtools/binutils/binutils-2.36.inc | 1 | ||||
-rw-r--r-- | meta/recipes-devtools/binutils/binutils/0001-CVE-2021-42574.patch | 2006 |
2 files changed, 2007 insertions, 0 deletions
diff --git a/meta/recipes-devtools/binutils/binutils-2.36.inc b/meta/recipes-devtools/binutils/binutils-2.36.inc index 7d0824e060..e4fdadc70a 100644 --- a/meta/recipes-devtools/binutils/binutils-2.36.inc +++ b/meta/recipes-devtools/binutils/binutils-2.36.inc | |||
@@ -46,5 +46,6 @@ SRC_URI = "\ | |||
46 | file://0003-CVE-2021-20197.patch \ | 46 | file://0003-CVE-2021-20197.patch \ |
47 | file://0017-CVE-2021-3530.patch \ | 47 | file://0017-CVE-2021-3530.patch \ |
48 | file://0018-CVE-2021-3530.patch \ | 48 | file://0018-CVE-2021-3530.patch \ |
49 | file://0001-CVE-2021-42574.patch \ | ||
49 | " | 50 | " |
50 | S = "${WORKDIR}/git" | 51 | S = "${WORKDIR}/git" |
diff --git a/meta/recipes-devtools/binutils/binutils/0001-CVE-2021-42574.patch b/meta/recipes-devtools/binutils/binutils/0001-CVE-2021-42574.patch new file mode 100644 index 0000000000..c93fd770b3 --- /dev/null +++ b/meta/recipes-devtools/binutils/binutils/0001-CVE-2021-42574.patch | |||
@@ -0,0 +1,2006 @@ | |||
1 | From b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5 Mon Sep 17 00:00:00 2001 | ||
2 | From: Nick Clifton <nickc@redhat.com> | ||
3 | Date: Tue, 9 Nov 2021 13:25:42 +0000 | ||
4 | Subject: [PATCH] Add --unicode option to control how unicode characters are | ||
5 | handled by display tools. | ||
6 | |||
7 | * nm.c: Add --unicode option to control how unicode characters are | ||
8 | handled. | ||
9 | * objdump.c: Likewise. | ||
10 | * readelf.c: Likewise. | ||
11 | * strings.c: Likewise. | ||
12 | * binutils.texi: Document the new feature. | ||
13 | * NEWS: Document the new feature. | ||
14 | * testsuite/binutils-all/unicode.exp: New file. | ||
15 | * testsuite/binutils-all/nm.hex.unicode | ||
16 | * testsuite/binutils-all/strings.escape.unicode | ||
17 | * testsuite/binutils-all/objdump.highlight.unicode | ||
18 | * testsuite/binutils-all/readelf.invalid.unicode | ||
19 | |||
20 | CVE: CVE-2021-42574 | ||
21 | Upstream-Status: Backport [https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5] | ||
22 | |||
23 | Signed-off-by: pgowda <pgowda.cve@gmail.com> | ||
24 | --- | ||
25 | binutils/ChangeLog | 15 + | ||
26 | binutils/NEWS | 9 + | ||
27 | binutils/doc/binutils.texi | 78 ++++ | ||
28 | binutils/nm.c | 228 ++++++++++- | ||
29 | binutils/objdump.c | 235 ++++++++++-- | ||
30 | binutils/readelf.c | 190 +++++++++- | ||
31 | binutils/strings.c | 757 ++++++++++++++++++++++++++++++++++--- | ||
32 | 7 files changed, 1409 insertions(+), 103 deletions(-) | ||
33 | |||
34 | diff --git a/binutils/ChangeLog b/binutils/ChangeLog | ||
35 | --- a/binutils/ChangeLog 2021-12-23 03:23:38.425843662 -0800 | ||
36 | +++ b/binutils/ChangeLog 2021-12-23 03:30:31.094968942 -0800 | ||
37 | @@ -1,3 +1,18 @@ | ||
38 | +2021-11-09 Nick Clifton <nickc@redhat.com> | ||
39 | + | ||
40 | + * nm.c: Add --unicode option to control how unicode characters are | ||
41 | + handled. | ||
42 | + * objdump.c: Likewise. | ||
43 | + * readelf.c: Likewise. | ||
44 | + * strings.c: Likewise. | ||
45 | + * binutils.texi: Document the new feature. | ||
46 | + * NEWS: Document the new feature. | ||
47 | + * testsuite/binutils-all/unicode.exp: New file. | ||
48 | + * testsuite/binutils-all/nm.hex.unicode | ||
49 | + * testsuite/binutils-all/strings.escape.unicode | ||
50 | + * testsuite/binutils-all/objdump.highlight.unicode | ||
51 | + * testsuite/binutils-all/readelf.invalid.unicode | ||
52 | + | ||
53 | 2021-02-09 Alan Modra <amodra@gmail.com> | ||
54 | |||
55 | PR 27382 | ||
56 | diff --git a/binutils/doc/binutils.texi b/binutils/doc/binutils.texi | ||
57 | --- a/binutils/doc/binutils.texi 2021-12-23 03:23:38.441843395 -0800 | ||
58 | +++ b/binutils/doc/binutils.texi 2021-12-23 03:30:31.094968942 -0800 | ||
59 | @@ -805,6 +805,7 @@ nm [@option{-A}|@option{-o}|@option{--pr | ||
60 | [@option{-s}|@option{--print-armap}] [@option{-t} @var{radix}|@option{--radix=}@var{radix}] | ||
61 | [@option{-u}|@option{--undefined-only}] [@option{-V}|@option{--version}] | ||
62 | [@option{-X 32_64}] [@option{--defined-only}] [@option{--no-demangle}] | ||
63 | + [@option{-U} @var{method}] [@option{--unicode=}@var{method}] | ||
64 | [@option{--plugin} @var{name}] | ||
65 | [@option{--no-recurse-limit}|@option{--recurse-limit}]] | ||
66 | [@option{--size-sort}] [@option{--special-syms}] | ||
67 | @@ -1114,6 +1115,21 @@ Use @var{radix} as the radix for printin | ||
68 | @cindex undefined symbols | ||
69 | Display only undefined symbols (those external to each object file). | ||
70 | |||
71 | +@item -U @var{[d|i|l|e|x|h]} | ||
72 | +@itemx --unicode=@var{[default|invalid|locale|escape|hex|highlight]} | ||
73 | +Controls the display of UTF-8 encoded mulibyte characters in strings. | ||
74 | +The default (@option{--unicode=default}) is to give them no special | ||
75 | +treatment. The @option{--unicode=locale} option displays the sequence | ||
76 | +in the current locale, which may or may not support them. The options | ||
77 | +@option{--unicode=hex} and @option{--unicode=invalid} display them as | ||
78 | +hex byte sequences enclosed by either angle brackets or curly braces. | ||
79 | + | ||
80 | +The @option{--unicode=escape} option displays them as escape sequences | ||
81 | +(@var{\uxxxx}) and the @option{--unicode=highlight} option displays | ||
82 | +them as escape sequences highlighted in red (if supported by the | ||
83 | +output device). The colouring is intended to draw attention to the | ||
84 | +presence of unicode sequences where they might not be expected. | ||
85 | + | ||
86 | @item -V | ||
87 | @itemx --version | ||
88 | Show the version number of @command{nm} and exit. | ||
89 | @@ -2210,6 +2226,7 @@ objdump [@option{-a}|@option{--archive-h | ||
90 | [@option{--prefix-strip=}@var{level}] | ||
91 | [@option{--insn-width=}@var{width}] | ||
92 | [@option{--visualize-jumps[=color|=extended-color|=off]} | ||
93 | + [@option{-U} @var{method}] [@option{--unicode=}@var{method}] | ||
94 | [@option{-V}|@option{--version}] | ||
95 | [@option{-H}|@option{--help}] | ||
96 | @var{objfile}@dots{} | ||
97 | @@ -2877,6 +2894,21 @@ When displaying symbols include those wh | ||
98 | special in some way and which would not normally be of interest to the | ||
99 | user. | ||
100 | |||
101 | +@item -U @var{[d|i|l|e|x|h]} | ||
102 | +@itemx --unicode=@var{[default|invalid|locale|escape|hex|highlight]} | ||
103 | +Controls the display of UTF-8 encoded mulibyte characters in strings. | ||
104 | +The default (@option{--unicode=default}) is to give them no special | ||
105 | +treatment. The @option{--unicode=locale} option displays the sequence | ||
106 | +in the current locale, which may or may not support them. The options | ||
107 | +@option{--unicode=hex} and @option{--unicode=invalid} display them as | ||
108 | +hex byte sequences enclosed by either angle brackets or curly braces. | ||
109 | + | ||
110 | +The @option{--unicode=escape} option displays them as escape sequences | ||
111 | +(@var{\uxxxx}) and the @option{--unicode=highlight} option displays | ||
112 | +them as escape sequences highlighted in red (if supported by the | ||
113 | +output device). The colouring is intended to draw attention to the | ||
114 | +presence of unicode sequences where they might not be expected. | ||
115 | + | ||
116 | @item -V | ||
117 | @itemx --version | ||
118 | Print the version number of @command{objdump} and exit. | ||
119 | @@ -3153,6 +3185,7 @@ strings [@option{-afovV}] [@option{-}@va | ||
120 | [@option{-n} @var{min-len}] [@option{--bytes=}@var{min-len}] | ||
121 | [@option{-t} @var{radix}] [@option{--radix=}@var{radix}] | ||
122 | [@option{-e} @var{encoding}] [@option{--encoding=}@var{encoding}] | ||
123 | + [@option{-U} @var{method}] [@option{--unicode=}@var{method}] | ||
124 | [@option{-}] [@option{--all}] [@option{--print-file-name}] | ||
125 | [@option{-T} @var{bfdname}] [@option{--target=}@var{bfdname}] | ||
126 | [@option{-w}] [@option{--include-all-whitespace}] | ||
127 | @@ -3244,6 +3277,28 @@ single-8-bit-byte characters, @samp{b} = | ||
128 | littleendian. Useful for finding wide character strings. (@samp{l} | ||
129 | and @samp{b} apply to, for example, Unicode UTF-16/UCS-2 encodings). | ||
130 | |||
131 | +@item -U @var{[d|i|l|e|x|h]} | ||
132 | +@itemx --unicode=@var{[default|invalid|locale|escape|hex|highlight]} | ||
133 | +Controls the display of UTF-8 encoded mulibyte characters in strings. | ||
134 | +The default (@option{--unicode=default}) is to give them no special | ||
135 | +treatment, and instead rely upon the setting of the | ||
136 | +@option{--encoding} option. The other values for this option | ||
137 | +automatically enable @option{--encoding=S}. | ||
138 | + | ||
139 | +The @option{--unicode=invalid} option treats them as non-graphic | ||
140 | +characters and hence not part of a valid string. All the remaining | ||
141 | +options treat them as valid string characters. | ||
142 | + | ||
143 | +The @option{--unicode=locale} option displays them in the current | ||
144 | +locale, which may or may not support UTF-8 encoding. The | ||
145 | +@option{--unicode=hex} option displays them as hex byte sequences | ||
146 | +enclosed between @var{<>} characters. The @option{--unicode=escape} | ||
147 | +option displays them as escape sequences (@var{\uxxxx}) and the | ||
148 | +@option{--unicode=highlight} option displays them as escape sequences | ||
149 | +highlighted in red (if supported by the output device). The colouring | ||
150 | +is intended to draw attention to the presence of unicode sequences | ||
151 | +where they might not be expected. | ||
152 | + | ||
153 | @item -T @var{bfdname} | ||
154 | @itemx --target=@var{bfdname} | ||
155 | @cindex object code format | ||
156 | @@ -4740,6 +4795,7 @@ readelf [@option{-a}|@option{--all}] | ||
157 | [@option{--dyn-syms}|@option{--lto-syms}] | ||
158 | [@option{--demangle@var{=style}}|@option{--no-demangle}] | ||
159 | [@option{--recurse-limit}|@option{--no-recurse-limit}] | ||
160 | + [@option{-U} @var{method}|@option{--unicode=}@var{method}] | ||
161 | [@option{-n}|@option{--notes}] | ||
162 | [@option{-r}|@option{--relocs}] | ||
163 | [@option{-u}|@option{--unwind}] | ||
164 | @@ -4887,6 +4943,28 @@ necessary in order to demangle truly com | ||
165 | that if the recursion limit is disabled then stack exhaustion is | ||
166 | possible and any bug reports about such an event will be rejected. | ||
167 | |||
168 | +@item -U @var{[d|i|l|e|x|h]} | ||
169 | +@itemx --unicode=[default|invalid|locale|escape|hex|highlight] | ||
170 | +Controls the display of non-ASCII characters in identifier names. | ||
171 | +The default (@option{--unicode=locale} or @option{--unicode=default}) is | ||
172 | +to treat them as multibyte characters and display them in the current | ||
173 | +locale. All other versions of this option treat the bytes as UTF-8 | ||
174 | +encoded values and attempt to interpret them. If they cannot be | ||
175 | +interpreted or if the @option{--unicode=invalid} option is used then | ||
176 | +they are displayed as a sequence of hex bytes, encloses in curly | ||
177 | +parethesis characters. | ||
178 | + | ||
179 | +Using the @option{--unicode=escape} option will display the characters | ||
180 | +as as unicode escape sequences (@var{\uxxxx}). Using the | ||
181 | +@option{--unicode=hex} will display the characters as hex byte | ||
182 | +sequences enclosed between angle brackets. | ||
183 | + | ||
184 | +Using the @option{--unicode=highlight} will display the characters as | ||
185 | +unicode escape sequences but it will also highlighted them in red, | ||
186 | +assuming that colouring is supported by the output device. The | ||
187 | +colouring is intended to draw attention to the presence of unicode | ||
188 | +sequences when they might not be expected. | ||
189 | + | ||
190 | @item -e | ||
191 | @itemx --headers | ||
192 | Display all the headers in the file. Equivalent to @option{-h -l -S}. | ||
193 | diff --git a/binutils/NEWS b/binutils/NEWS | ||
194 | --- a/binutils/NEWS 2021-12-23 03:23:38.433843528 -0800 | ||
195 | +++ b/binutils/NEWS 2021-12-23 03:30:31.094968942 -0800 | ||
196 | @@ -1,5 +1,14 @@ | ||
197 | -*- text -*- | ||
198 | |||
199 | +* Tools which display symbols or strings (readelf, strings, nm, objdump) | ||
200 | + have a new command line option which controls how unicode characters are | ||
201 | + handled. By default they are treated as normal for the tool. Using | ||
202 | + --unicode=locale will display them according to the current locale. | ||
203 | + Using --unicode=hex will display them as hex byte values, whilst | ||
204 | + --unicode=escape will display them as escape sequences. In addition | ||
205 | + using --unicode=highlight will display them as unicode escape sequences | ||
206 | + highlighted in red (if supported by the output device). | ||
207 | + | ||
208 | Changes in 2.36: | ||
209 | |||
210 | * Update elfedit and readelf with LAM_U48 and LAM_U57 support. | ||
211 | diff --git a/binutils/nm.c b/binutils/nm.c | ||
212 | --- a/binutils/nm.c 2021-12-23 03:23:38.441843395 -0800 | ||
213 | +++ b/binutils/nm.c 2021-12-23 03:30:31.098968875 -0800 | ||
214 | @@ -38,6 +38,11 @@ | ||
215 | #include "bucomm.h" | ||
216 | #include "plugin-api.h" | ||
217 | #include "plugin.h" | ||
218 | +#include "safe-ctype.h" | ||
219 | + | ||
220 | +#ifndef streq | ||
221 | +#define streq(a,b) (strcmp ((a),(b)) == 0) | ||
222 | +#endif | ||
223 | |||
224 | /* When sorting by size, we use this structure to hold the size and a | ||
225 | pointer to the minisymbol. */ | ||
226 | @@ -192,6 +197,18 @@ static const char *plugin_target = NULL; | ||
227 | static bfd *lineno_cache_bfd; | ||
228 | static bfd *lineno_cache_rel_bfd; | ||
229 | |||
230 | +typedef enum unicode_display_type | ||
231 | +{ | ||
232 | + unicode_default = 0, | ||
233 | + unicode_locale, | ||
234 | + unicode_escape, | ||
235 | + unicode_hex, | ||
236 | + unicode_highlight, | ||
237 | + unicode_invalid | ||
238 | +} unicode_display_type; | ||
239 | + | ||
240 | +static unicode_display_type unicode_display = unicode_default; | ||
241 | + | ||
242 | enum long_option_values | ||
243 | { | ||
244 | OPTION_TARGET = 200, | ||
245 | @@ -234,6 +251,7 @@ static struct option long_options[] = | ||
246 | {"target", required_argument, 0, OPTION_TARGET}, | ||
247 | {"defined-only", no_argument, &defined_only, 1}, | ||
248 | {"undefined-only", no_argument, &undefined_only, 1}, | ||
249 | + {"unicode", required_argument, NULL, 'U'}, | ||
250 | {"version", no_argument, &show_version, 1}, | ||
251 | {"with-symbol-versions", no_argument, NULL, | ||
252 | OPTION_WITH_SYMBOL_VERSIONS}, | ||
253 | @@ -285,6 +303,8 @@ usage (FILE *stream, int status) | ||
254 | -t, --radix=RADIX Use RADIX for printing symbol values\n\ | ||
255 | --target=BFDNAME Specify the target object format as BFDNAME\n\ | ||
256 | -u, --undefined-only Display only undefined symbols\n\ | ||
257 | + -U {d|s|i|x|e|h} Specify how to treat UTF-8 encoded unicode characters\n\ | ||
258 | + --unicode={default|show|invalid|hex|escape|highlight}\n\ | ||
259 | --with-symbol-versions Display version strings after symbol names\n\ | ||
260 | -X 32_64 (ignored)\n\ | ||
261 | @FILE Read options from FILE\n\ | ||
262 | @@ -399,6 +419,188 @@ get_coff_symbol_type (const struct inter | ||
263 | fatal ("%s", xstrerror (errno)); | ||
264 | return bufp; | ||
265 | } | ||
266 | + | ||
267 | +/* Convert a potential UTF-8 encoded sequence in IN into characters in OUT. | ||
268 | + The conversion format is controlled by the unicode_display variable. | ||
269 | + Returns the number of characters added to OUT. | ||
270 | + Returns the number of bytes consumed from IN in CONSUMED. | ||
271 | + Always consumes at least one byte and displays at least one character. */ | ||
272 | + | ||
273 | +static unsigned int | ||
274 | +display_utf8 (const unsigned char * in, char * out, unsigned int * consumed) | ||
275 | +{ | ||
276 | + char * orig_out = out; | ||
277 | + unsigned int nchars = 0; | ||
278 | + unsigned int j; | ||
279 | + | ||
280 | + if (unicode_display == unicode_default) | ||
281 | + goto invalid; | ||
282 | + | ||
283 | + if (in[0] < 0xc0) | ||
284 | + goto invalid; | ||
285 | + | ||
286 | + if ((in[1] & 0xc0) != 0x80) | ||
287 | + goto invalid; | ||
288 | + | ||
289 | + if ((in[0] & 0x20) == 0) | ||
290 | + { | ||
291 | + nchars = 2; | ||
292 | + goto valid; | ||
293 | + } | ||
294 | + | ||
295 | + if ((in[2] & 0xc0) != 0x80) | ||
296 | + goto invalid; | ||
297 | + | ||
298 | + if ((in[0] & 0x10) == 0) | ||
299 | + { | ||
300 | + nchars = 3; | ||
301 | + goto valid; | ||
302 | + } | ||
303 | + | ||
304 | + if ((in[3] & 0xc0) != 0x80) | ||
305 | + goto invalid; | ||
306 | + | ||
307 | + nchars = 4; | ||
308 | + | ||
309 | + valid: | ||
310 | + switch (unicode_display) | ||
311 | + { | ||
312 | + case unicode_locale: | ||
313 | + /* Copy the bytes into the output buffer as is. */ | ||
314 | + memcpy (out, in, nchars); | ||
315 | + out += nchars; | ||
316 | + break; | ||
317 | + | ||
318 | + case unicode_invalid: | ||
319 | + case unicode_hex: | ||
320 | + out += sprintf (out, "%c", unicode_display == unicode_hex ? '<' : '{'); | ||
321 | + out += sprintf (out, "0x"); | ||
322 | + for (j = 0; j < nchars; j++) | ||
323 | + out += sprintf (out, "%02x", in [j]); | ||
324 | + out += sprintf (out, "%c", unicode_display == unicode_hex ? '>' : '}'); | ||
325 | + break; | ||
326 | + | ||
327 | + case unicode_highlight: | ||
328 | + if (isatty (1)) | ||
329 | + out += sprintf (out, "\x1B[31;47m"); /* Red. */ | ||
330 | + /* Fall through. */ | ||
331 | + case unicode_escape: | ||
332 | + switch (nchars) | ||
333 | + { | ||
334 | + case 2: | ||
335 | + out += sprintf (out, "\\u%02x%02x", | ||
336 | + ((in[0] & 0x1c) >> 2), | ||
337 | + ((in[0] & 0x03) << 6) | (in[1] & 0x3f)); | ||
338 | + break; | ||
339 | + | ||
340 | + case 3: | ||
341 | + out += sprintf (out, "\\u%02x%02x", | ||
342 | + ((in[0] & 0x0f) << 4) | ((in[1] & 0x3c) >> 2), | ||
343 | + ((in[1] & 0x03) << 6) | ((in[2] & 0x3f))); | ||
344 | + break; | ||
345 | + | ||
346 | + case 4: | ||
347 | + out += sprintf (out, "\\u%02x%02x%02x", | ||
348 | + ((in[0] & 0x07) << 6) | ((in[1] & 0x3c) >> 2), | ||
349 | + ((in[1] & 0x03) << 6) | ((in[2] & 0x3c) >> 2), | ||
350 | + ((in[2] & 0x03) << 6) | ((in[3] & 0x3f))); | ||
351 | + break; | ||
352 | + default: | ||
353 | + /* URG. */ | ||
354 | + break; | ||
355 | + } | ||
356 | + | ||
357 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
358 | + out += sprintf (out, "\033[0m"); /* Default colour. */ | ||
359 | + break; | ||
360 | + | ||
361 | + default: | ||
362 | + /* URG */ | ||
363 | + break; | ||
364 | + } | ||
365 | + | ||
366 | + * consumed = nchars; | ||
367 | + return out - orig_out; | ||
368 | + | ||
369 | + invalid: | ||
370 | + /* Not a valid UTF-8 sequence. */ | ||
371 | + *out = *in; | ||
372 | + * consumed = 1; | ||
373 | + return 1; | ||
374 | +} | ||
375 | + | ||
376 | +/* Convert any UTF-8 encoded characters in NAME into the form specified by | ||
377 | + unicode_display. Also converts control characters. Returns a static | ||
378 | + buffer if conversion was necessary. | ||
379 | + Code stolen from objdump.c:sanitize_string(). */ | ||
380 | + | ||
381 | +static const char * | ||
382 | +convert_utf8 (const char * in) | ||
383 | +{ | ||
384 | + static char * buffer = NULL; | ||
385 | + static size_t buffer_len = 0; | ||
386 | + const char * original = in; | ||
387 | + char * out; | ||
388 | + | ||
389 | + /* Paranoia. */ | ||
390 | + if (in == NULL) | ||
391 | + return ""; | ||
392 | + | ||
393 | + /* See if any conversion is necessary. | ||
394 | + In the majority of cases it will not be needed. */ | ||
395 | + do | ||
396 | + { | ||
397 | + unsigned char c = *in++; | ||
398 | + | ||
399 | + if (c == 0) | ||
400 | + return original; | ||
401 | + | ||
402 | + if (ISCNTRL (c)) | ||
403 | + break; | ||
404 | + | ||
405 | + if (unicode_display != unicode_default && c >= 0xc0) | ||
406 | + break; | ||
407 | + } | ||
408 | + while (1); | ||
409 | + | ||
410 | + /* Copy the input, translating as needed. */ | ||
411 | + in = original; | ||
412 | + if (buffer_len < (strlen (in) * 9)) | ||
413 | + { | ||
414 | + free ((void *) buffer); | ||
415 | + buffer_len = strlen (in) * 9; | ||
416 | + buffer = xmalloc (buffer_len + 1); | ||
417 | + } | ||
418 | + | ||
419 | + out = buffer; | ||
420 | + do | ||
421 | + { | ||
422 | + unsigned char c = *in++; | ||
423 | + | ||
424 | + if (c == 0) | ||
425 | + break; | ||
426 | + | ||
427 | + if (ISCNTRL (c)) | ||
428 | + { | ||
429 | + *out++ = '^'; | ||
430 | + *out++ = c + 0x40; | ||
431 | + } | ||
432 | + else if (unicode_display != unicode_default && c >= 0xc0) | ||
433 | + { | ||
434 | + unsigned int num_consumed; | ||
435 | + | ||
436 | + out += display_utf8 ((const unsigned char *)(in - 1), out, & num_consumed); | ||
437 | + in += num_consumed - 1; | ||
438 | + } | ||
439 | + else | ||
440 | + *out++ = c; | ||
441 | + } | ||
442 | + while (1); | ||
443 | + | ||
444 | + *out = 0; | ||
445 | + return buffer; | ||
446 | +} | ||
447 | + | ||
448 | |||
449 | /* Print symbol name NAME, read from ABFD, with printf format FORM, | ||
450 | demangling it if requested. */ | ||
451 | @@ -418,6 +620,11 @@ print_symname (const char *form, struct | ||
452 | name = alloc; | ||
453 | } | ||
454 | |||
455 | + if (unicode_display != unicode_default) | ||
456 | + { | ||
457 | + name = convert_utf8 (name); | ||
458 | + } | ||
459 | + | ||
460 | if (info != NULL && info->elfinfo) | ||
461 | { | ||
462 | const char *version_string; | ||
463 | @@ -1738,7 +1945,7 @@ main (int argc, char **argv) | ||
464 | fatal (_("fatal error: libbfd ABI mismatch")); | ||
465 | set_default_bfd_target (); | ||
466 | |||
467 | - while ((c = getopt_long (argc, argv, "aABCDef:gHhlnopPrSst:uvVvX:", | ||
468 | + while ((c = getopt_long (argc, argv, "aABCDef:gHhlnopPrSst:uU:vVvX:", | ||
469 | long_options, (int *) 0)) != EOF) | ||
470 | { | ||
471 | switch (c) | ||
472 | @@ -1828,6 +2035,24 @@ main (int argc, char **argv) | ||
473 | case 'u': | ||
474 | undefined_only = 1; | ||
475 | break; | ||
476 | + | ||
477 | + case 'U': | ||
478 | + if (streq (optarg, "default") || streq (optarg, "d")) | ||
479 | + unicode_display = unicode_default; | ||
480 | + else if (streq (optarg, "locale") || streq (optarg, "l")) | ||
481 | + unicode_display = unicode_locale; | ||
482 | + else if (streq (optarg, "escape") || streq (optarg, "e")) | ||
483 | + unicode_display = unicode_escape; | ||
484 | + else if (streq (optarg, "invalid") || streq (optarg, "i")) | ||
485 | + unicode_display = unicode_invalid; | ||
486 | + else if (streq (optarg, "hex") || streq (optarg, "x")) | ||
487 | + unicode_display = unicode_hex; | ||
488 | + else if (streq (optarg, "highlight") || streq (optarg, "h")) | ||
489 | + unicode_display = unicode_highlight; | ||
490 | + else | ||
491 | + fatal (_("invalid argument to -U/--unicode: %s"), optarg); | ||
492 | + break; | ||
493 | + | ||
494 | case 'V': | ||
495 | show_version = 1; | ||
496 | break; | ||
497 | diff --git a/binutils/objdump.c b/binutils/objdump.c | ||
498 | --- a/binutils/objdump.c 2021-12-23 03:23:38.445843329 -0800 | ||
499 | +++ b/binutils/objdump.c 2021-12-23 03:30:31.098968875 -0800 | ||
500 | @@ -205,6 +205,19 @@ static const struct objdump_private_desc | ||
501 | |||
502 | /* The list of detected jumps inside a function. */ | ||
503 | static struct jump_info *detected_jumps = NULL; | ||
504 | + | ||
505 | +typedef enum unicode_display_type | ||
506 | +{ | ||
507 | + unicode_default = 0, | ||
508 | + unicode_locale, | ||
509 | + unicode_escape, | ||
510 | + unicode_hex, | ||
511 | + unicode_highlight, | ||
512 | + unicode_invalid | ||
513 | +} unicode_display_type; | ||
514 | + | ||
515 | +static unicode_display_type unicode_display = unicode_default; | ||
516 | + | ||
517 | |||
518 | static void usage (FILE *, int) ATTRIBUTE_NORETURN; | ||
519 | static void | ||
520 | @@ -274,6 +287,8 @@ usage (FILE *stream, int status) | ||
521 | --recurse-limit Enable a limit on recursion whilst demangling. [Default]\n\ | ||
522 | --no-recurse-limit Disable a limit on recursion whilst demangling\n\ | ||
523 | -w, --wide Format output for more than 80 columns\n\ | ||
524 | + -U[d|l|i|x|e|h] Controls the display of UTF-8 unicode characters\n\ | ||
525 | + --unicode=[default|locale|invalid|hex|escape|highlight]\n\ | ||
526 | -z, --disassemble-zeroes Do not skip blocks of zeroes when disassembling\n\ | ||
527 | --start-address=ADDR Only process data whose address is >= ADDR\n\ | ||
528 | --stop-address=ADDR Only process data whose address is < ADDR\n\ | ||
529 | @@ -348,17 +363,23 @@ static struct option long_options[]= | ||
530 | { | ||
531 | {"adjust-vma", required_argument, NULL, OPTION_ADJUST_VMA}, | ||
532 | {"all-headers", no_argument, NULL, 'x'}, | ||
533 | - {"private-headers", no_argument, NULL, 'p'}, | ||
534 | - {"private", required_argument, NULL, 'P'}, | ||
535 | {"architecture", required_argument, NULL, 'm'}, | ||
536 | {"archive-headers", no_argument, NULL, 'a'}, | ||
537 | +#ifdef ENABLE_LIBCTF | ||
538 | + {"ctf", required_argument, NULL, OPTION_CTF}, | ||
539 | + {"ctf-parent", required_argument, NULL, OPTION_CTF_PARENT}, | ||
540 | +#endif | ||
541 | {"debugging", no_argument, NULL, 'g'}, | ||
542 | {"debugging-tags", no_argument, NULL, 'e'}, | ||
543 | {"demangle", optional_argument, NULL, 'C'}, | ||
544 | {"disassemble", optional_argument, NULL, 'd'}, | ||
545 | {"disassemble-all", no_argument, NULL, 'D'}, | ||
546 | - {"disassembler-options", required_argument, NULL, 'M'}, | ||
547 | {"disassemble-zeroes", no_argument, NULL, 'z'}, | ||
548 | + {"disassembler-options", required_argument, NULL, 'M'}, | ||
549 | + {"dwarf", optional_argument, NULL, OPTION_DWARF}, | ||
550 | + {"dwarf-check", no_argument, 0, OPTION_DWARF_CHECK}, | ||
551 | + {"dwarf-depth", required_argument, 0, OPTION_DWARF_DEPTH}, | ||
552 | + {"dwarf-start", required_argument, 0, OPTION_DWARF_START}, | ||
553 | {"dynamic-reloc", no_argument, NULL, 'R'}, | ||
554 | {"dynamic-syms", no_argument, NULL, 'T'}, | ||
555 | {"endian", required_argument, NULL, OPTION_ENDIAN}, | ||
556 | @@ -368,15 +389,22 @@ static struct option long_options[]= | ||
557 | {"full-contents", no_argument, NULL, 's'}, | ||
558 | {"headers", no_argument, NULL, 'h'}, | ||
559 | {"help", no_argument, NULL, 'H'}, | ||
560 | + {"include", required_argument, NULL, 'I'}, | ||
561 | {"info", no_argument, NULL, 'i'}, | ||
562 | + {"inlines", no_argument, 0, OPTION_INLINES}, | ||
563 | + {"insn-width", required_argument, NULL, OPTION_INSN_WIDTH}, | ||
564 | {"line-numbers", no_argument, NULL, 'l'}, | ||
565 | - {"no-show-raw-insn", no_argument, &show_raw_insn, -1}, | ||
566 | {"no-addresses", no_argument, &no_addresses, 1}, | ||
567 | + {"no-recurse-limit", no_argument, NULL, OPTION_NO_RECURSE_LIMIT}, | ||
568 | + {"no-recursion-limit", no_argument, NULL, OPTION_NO_RECURSE_LIMIT}, | ||
569 | + {"no-show-raw-insn", no_argument, &show_raw_insn, -1}, | ||
570 | + {"prefix", required_argument, NULL, OPTION_PREFIX}, | ||
571 | {"prefix-addresses", no_argument, &prefix_addresses, 1}, | ||
572 | + {"prefix-strip", required_argument, NULL, OPTION_PREFIX_STRIP}, | ||
573 | + {"private", required_argument, NULL, 'P'}, | ||
574 | + {"private-headers", no_argument, NULL, 'p'}, | ||
575 | {"recurse-limit", no_argument, NULL, OPTION_RECURSE_LIMIT}, | ||
576 | {"recursion-limit", no_argument, NULL, OPTION_RECURSE_LIMIT}, | ||
577 | - {"no-recurse-limit", no_argument, NULL, OPTION_NO_RECURSE_LIMIT}, | ||
578 | - {"no-recursion-limit", no_argument, NULL, OPTION_NO_RECURSE_LIMIT}, | ||
579 | {"reloc", no_argument, NULL, 'r'}, | ||
580 | {"section", required_argument, NULL, 'j'}, | ||
581 | {"section-headers", no_argument, NULL, 'h'}, | ||
582 | @@ -384,28 +412,16 @@ static struct option long_options[]= | ||
583 | {"source", no_argument, NULL, 'S'}, | ||
584 | {"source-comment", optional_argument, NULL, OPTION_SOURCE_COMMENT}, | ||
585 | {"special-syms", no_argument, &dump_special_syms, 1}, | ||
586 | - {"include", required_argument, NULL, 'I'}, | ||
587 | - {"dwarf", optional_argument, NULL, OPTION_DWARF}, | ||
588 | -#ifdef ENABLE_LIBCTF | ||
589 | - {"ctf", required_argument, NULL, OPTION_CTF}, | ||
590 | - {"ctf-parent", required_argument, NULL, OPTION_CTF_PARENT}, | ||
591 | -#endif | ||
592 | {"stabs", no_argument, NULL, 'G'}, | ||
593 | {"start-address", required_argument, NULL, OPTION_START_ADDRESS}, | ||
594 | {"stop-address", required_argument, NULL, OPTION_STOP_ADDRESS}, | ||
595 | {"syms", no_argument, NULL, 't'}, | ||
596 | {"target", required_argument, NULL, 'b'}, | ||
597 | + {"unicode", required_argument, NULL, 'U'}, | ||
598 | {"version", no_argument, NULL, 'V'}, | ||
599 | - {"wide", no_argument, NULL, 'w'}, | ||
600 | - {"prefix", required_argument, NULL, OPTION_PREFIX}, | ||
601 | - {"prefix-strip", required_argument, NULL, OPTION_PREFIX_STRIP}, | ||
602 | - {"insn-width", required_argument, NULL, OPTION_INSN_WIDTH}, | ||
603 | - {"dwarf-depth", required_argument, 0, OPTION_DWARF_DEPTH}, | ||
604 | - {"dwarf-start", required_argument, 0, OPTION_DWARF_START}, | ||
605 | - {"dwarf-check", no_argument, 0, OPTION_DWARF_CHECK}, | ||
606 | - {"inlines", no_argument, 0, OPTION_INLINES}, | ||
607 | {"visualize-jumps", optional_argument, 0, OPTION_VISUALIZE_JUMPS}, | ||
608 | - {0, no_argument, 0, 0} | ||
609 | + {"wide", no_argument, NULL, 'w'}, | ||
610 | + {NULL, no_argument, NULL, 0} | ||
611 | }; | ||
612 | |||
613 | static void | ||
614 | @@ -415,9 +431,121 @@ nonfatal (const char *msg) | ||
615 | exit_status = 1; | ||
616 | } | ||
617 | |||
618 | +/* Convert a potential UTF-8 encoded sequence in IN into characters in OUT. | ||
619 | + The conversion format is controlled by the unicode_display variable. | ||
620 | + Returns the number of characters added to OUT. | ||
621 | + Returns the number of bytes consumed from IN in CONSUMED. | ||
622 | + Always consumes at least one byte and displays at least one character. */ | ||
623 | + | ||
624 | +static unsigned int | ||
625 | +display_utf8 (const unsigned char * in, char * out, unsigned int * consumed) | ||
626 | +{ | ||
627 | + char * orig_out = out; | ||
628 | + unsigned int nchars = 0; | ||
629 | + unsigned int j; | ||
630 | + | ||
631 | + if (unicode_display == unicode_default) | ||
632 | + goto invalid; | ||
633 | + | ||
634 | + if (in[0] < 0xc0) | ||
635 | + goto invalid; | ||
636 | + | ||
637 | + if ((in[1] & 0xc0) != 0x80) | ||
638 | + goto invalid; | ||
639 | + | ||
640 | + if ((in[0] & 0x20) == 0) | ||
641 | + { | ||
642 | + nchars = 2; | ||
643 | + goto valid; | ||
644 | + } | ||
645 | + | ||
646 | + if ((in[2] & 0xc0) != 0x80) | ||
647 | + goto invalid; | ||
648 | + | ||
649 | + if ((in[0] & 0x10) == 0) | ||
650 | + { | ||
651 | + nchars = 3; | ||
652 | + goto valid; | ||
653 | + } | ||
654 | + | ||
655 | + if ((in[3] & 0xc0) != 0x80) | ||
656 | + goto invalid; | ||
657 | + | ||
658 | + nchars = 4; | ||
659 | + | ||
660 | + valid: | ||
661 | + switch (unicode_display) | ||
662 | + { | ||
663 | + case unicode_locale: | ||
664 | + /* Copy the bytes into the output buffer as is. */ | ||
665 | + memcpy (out, in, nchars); | ||
666 | + out += nchars; | ||
667 | + break; | ||
668 | + | ||
669 | + case unicode_invalid: | ||
670 | + case unicode_hex: | ||
671 | + out += sprintf (out, "%c", unicode_display == unicode_hex ? '<' : '{'); | ||
672 | + out += sprintf (out, "0x"); | ||
673 | + for (j = 0; j < nchars; j++) | ||
674 | + out += sprintf (out, "%02x", in [j]); | ||
675 | + out += sprintf (out, "%c", unicode_display == unicode_hex ? '>' : '}'); | ||
676 | + break; | ||
677 | + | ||
678 | + case unicode_highlight: | ||
679 | + if (isatty (1)) | ||
680 | + out += sprintf (out, "\x1B[31;47m"); /* Red. */ | ||
681 | + /* Fall through. */ | ||
682 | + case unicode_escape: | ||
683 | + switch (nchars) | ||
684 | + { | ||
685 | + case 2: | ||
686 | + out += sprintf (out, "\\u%02x%02x", | ||
687 | + ((in[0] & 0x1c) >> 2), | ||
688 | + ((in[0] & 0x03) << 6) | (in[1] & 0x3f)); | ||
689 | + break; | ||
690 | + | ||
691 | + case 3: | ||
692 | + out += sprintf (out, "\\u%02x%02x", | ||
693 | + ((in[0] & 0x0f) << 4) | ((in[1] & 0x3c) >> 2), | ||
694 | + ((in[1] & 0x03) << 6) | ((in[2] & 0x3f))); | ||
695 | + break; | ||
696 | + | ||
697 | + case 4: | ||
698 | + out += sprintf (out, "\\u%02x%02x%02x", | ||
699 | + ((in[0] & 0x07) << 6) | ((in[1] & 0x3c) >> 2), | ||
700 | + ((in[1] & 0x03) << 6) | ((in[2] & 0x3c) >> 2), | ||
701 | + ((in[2] & 0x03) << 6) | ((in[3] & 0x3f))); | ||
702 | + break; | ||
703 | + default: | ||
704 | + /* URG. */ | ||
705 | + break; | ||
706 | + } | ||
707 | + | ||
708 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
709 | + out += sprintf (out, "\033[0m"); /* Default colour. */ | ||
710 | + break; | ||
711 | + | ||
712 | + default: | ||
713 | + /* URG */ | ||
714 | + break; | ||
715 | + } | ||
716 | + | ||
717 | + * consumed = nchars; | ||
718 | + return out - orig_out; | ||
719 | + | ||
720 | + invalid: | ||
721 | + /* Not a valid UTF-8 sequence. */ | ||
722 | + *out = *in; | ||
723 | + * consumed = 1; | ||
724 | + return 1; | ||
725 | +} | ||
726 | + | ||
727 | /* Returns a version of IN with any control characters | ||
728 | replaced by escape sequences. Uses a static buffer | ||
729 | - if necessary. */ | ||
730 | + if necessary. | ||
731 | + | ||
732 | + If unicode display is enabled, then also handles the | ||
733 | + conversion of unicode characters. */ | ||
734 | |||
735 | static const char * | ||
736 | sanitize_string (const char * in) | ||
737 | @@ -435,40 +563,50 @@ sanitize_string (const char * in) | ||
738 | of cases it will not be needed. */ | ||
739 | do | ||
740 | { | ||
741 | - char c = *in++; | ||
742 | + unsigned char c = *in++; | ||
743 | |||
744 | if (c == 0) | ||
745 | return original; | ||
746 | |||
747 | if (ISCNTRL (c)) | ||
748 | break; | ||
749 | + | ||
750 | + if (unicode_display != unicode_default && c >= 0xc0) | ||
751 | + break; | ||
752 | } | ||
753 | while (1); | ||
754 | |||
755 | /* Copy the input, translating as needed. */ | ||
756 | in = original; | ||
757 | - if (buffer_len < (strlen (in) * 2)) | ||
758 | + if (buffer_len < (strlen (in) * 9)) | ||
759 | { | ||
760 | free ((void *) buffer); | ||
761 | - buffer_len = strlen (in) * 2; | ||
762 | + buffer_len = strlen (in) * 9; | ||
763 | buffer = xmalloc (buffer_len + 1); | ||
764 | } | ||
765 | |||
766 | out = buffer; | ||
767 | do | ||
768 | { | ||
769 | - char c = *in++; | ||
770 | + unsigned char c = *in++; | ||
771 | |||
772 | if (c == 0) | ||
773 | break; | ||
774 | |||
775 | - if (!ISCNTRL (c)) | ||
776 | - *out++ = c; | ||
777 | - else | ||
778 | + if (ISCNTRL (c)) | ||
779 | { | ||
780 | *out++ = '^'; | ||
781 | *out++ = c + 0x40; | ||
782 | } | ||
783 | + else if (unicode_display != unicode_default && c >= 0xc0) | ||
784 | + { | ||
785 | + unsigned int num_consumed; | ||
786 | + | ||
787 | + out += display_utf8 ((const unsigned char *)(in - 1), out, & num_consumed); | ||
788 | + in += num_consumed - 1; | ||
789 | + } | ||
790 | + else | ||
791 | + *out++ = c; | ||
792 | } | ||
793 | while (1); | ||
794 | |||
795 | @@ -4481,6 +4619,24 @@ dump_symbols (bfd *abfd ATTRIBUTE_UNUSED | ||
796 | free (alloc); | ||
797 | } | ||
798 | } | ||
799 | + else if (unicode_display != unicode_default | ||
800 | + && name != NULL && *name != '\0') | ||
801 | + { | ||
802 | + const char * sanitized_name; | ||
803 | + | ||
804 | + /* If we want to sanitize the name, we do it here, and | ||
805 | + temporarily clobber it while calling bfd_print_symbol. | ||
806 | + FIXME: This is a gross hack. */ | ||
807 | + sanitized_name = sanitize_string (name); | ||
808 | + if (sanitized_name != name) | ||
809 | + (*current)->name = sanitized_name; | ||
810 | + else | ||
811 | + sanitized_name = NULL; | ||
812 | + bfd_print_symbol (cur_bfd, stdout, *current, | ||
813 | + bfd_print_symbol_all); | ||
814 | + if (sanitized_name != NULL) | ||
815 | + (*current)->name = name; | ||
816 | + } | ||
817 | else | ||
818 | bfd_print_symbol (cur_bfd, stdout, *current, | ||
819 | bfd_print_symbol_all); | ||
820 | @@ -5162,7 +5318,7 @@ main (int argc, char **argv) | ||
821 | set_default_bfd_target (); | ||
822 | |||
823 | while ((c = getopt_long (argc, argv, | ||
824 | - "pP:ib:m:M:VvCdDlfFaHhrRtTxsSI:j:wE:zgeGW::", | ||
825 | + "pP:ib:m:U:M:VvCdDlfFaHhrRtTxsSI:j:wE:zgeGW::", | ||
826 | long_options, (int *) 0)) | ||
827 | != EOF) | ||
828 | { | ||
829 | @@ -5441,6 +5597,23 @@ main (int argc, char **argv) | ||
830 | seenflag = TRUE; | ||
831 | break; | ||
832 | |||
833 | + case 'U': | ||
834 | + if (streq (optarg, "default") || streq (optarg, "d")) | ||
835 | + unicode_display = unicode_default; | ||
836 | + else if (streq (optarg, "locale") || streq (optarg, "l")) | ||
837 | + unicode_display = unicode_locale; | ||
838 | + else if (streq (optarg, "escape") || streq (optarg, "e")) | ||
839 | + unicode_display = unicode_escape; | ||
840 | + else if (streq (optarg, "invalid") || streq (optarg, "i")) | ||
841 | + unicode_display = unicode_invalid; | ||
842 | + else if (streq (optarg, "hex") || streq (optarg, "x")) | ||
843 | + unicode_display = unicode_hex; | ||
844 | + else if (streq (optarg, "highlight") || streq (optarg, "h")) | ||
845 | + unicode_display = unicode_highlight; | ||
846 | + else | ||
847 | + fatal (_("invalid argument to -U/--unicode: %s"), optarg); | ||
848 | + break; | ||
849 | + | ||
850 | case 'H': | ||
851 | usage (stdout, 0); | ||
852 | /* No need to set seenflag or to break - usage() does not return. */ | ||
853 | diff --git a/binutils/readelf.c b/binutils/readelf.c | ||
854 | --- a/binutils/readelf.c 2021-12-23 03:23:42.065783023 -0800 | ||
855 | +++ b/binutils/readelf.c 2021-12-23 03:30:36.582877519 -0800 | ||
856 | @@ -321,6 +321,19 @@ typedef enum print_mode | ||
857 | } | ||
858 | print_mode; | ||
859 | |||
860 | +typedef enum unicode_display_type | ||
861 | +{ | ||
862 | + unicode_default = 0, | ||
863 | + unicode_locale, | ||
864 | + unicode_escape, | ||
865 | + unicode_hex, | ||
866 | + unicode_highlight, | ||
867 | + unicode_invalid | ||
868 | +} unicode_display_type; | ||
869 | + | ||
870 | +static unicode_display_type unicode_display = unicode_default; | ||
871 | + | ||
872 | + | ||
873 | /* Versioned symbol info. */ | ||
874 | enum versioned_symbol_info | ||
875 | { | ||
876 | @@ -613,11 +626,18 @@ print_symbol (signed int width, const ch | ||
877 | if (c == 0) | ||
878 | break; | ||
879 | |||
880 | - /* Do not print control characters directly as they can affect terminal | ||
881 | - settings. Such characters usually appear in the names generated | ||
882 | - by the assembler for local labels. */ | ||
883 | - if (ISCNTRL (c)) | ||
884 | + if (ISPRINT (c)) | ||
885 | + { | ||
886 | + putchar (c); | ||
887 | + width_remaining --; | ||
888 | + num_printed ++; | ||
889 | + } | ||
890 | + else if (ISCNTRL (c)) | ||
891 | { | ||
892 | + /* Do not print control characters directly as they can affect terminal | ||
893 | + settings. Such characters usually appear in the names generated | ||
894 | + by the assembler for local labels. */ | ||
895 | + | ||
896 | if (width_remaining < 2) | ||
897 | break; | ||
898 | |||
899 | @@ -625,11 +645,137 @@ print_symbol (signed int width, const ch | ||
900 | width_remaining -= 2; | ||
901 | num_printed += 2; | ||
902 | } | ||
903 | - else if (ISPRINT (c)) | ||
904 | + else if (c == 0x7f) | ||
905 | { | ||
906 | - putchar (c); | ||
907 | - width_remaining --; | ||
908 | - num_printed ++; | ||
909 | + if (width_remaining < 5) | ||
910 | + break; | ||
911 | + printf ("<DEL>"); | ||
912 | + width_remaining -= 5; | ||
913 | + num_printed += 5; | ||
914 | + } | ||
915 | + else if (unicode_display != unicode_locale | ||
916 | + && unicode_display != unicode_default) | ||
917 | + { | ||
918 | + /* Display unicode characters as something else. */ | ||
919 | + unsigned char bytes[4]; | ||
920 | + bfd_boolean is_utf8; | ||
921 | + unsigned int nbytes; | ||
922 | + | ||
923 | + bytes[0] = c; | ||
924 | + | ||
925 | + if (bytes[0] < 0xc0) | ||
926 | + { | ||
927 | + nbytes = 1; | ||
928 | + is_utf8 = FALSE; | ||
929 | + } | ||
930 | + else | ||
931 | + { | ||
932 | + bytes[1] = *symbol++; | ||
933 | + | ||
934 | + if ((bytes[1] & 0xc0) != 0x80) | ||
935 | + { | ||
936 | + is_utf8 = FALSE; | ||
937 | + /* Do not consume this character. It may only | ||
938 | + be the first byte in the sequence that was | ||
939 | + corrupt. */ | ||
940 | + --symbol; | ||
941 | + nbytes = 1; | ||
942 | + } | ||
943 | + else if ((bytes[0] & 0x20) == 0) | ||
944 | + { | ||
945 | + is_utf8 = TRUE; | ||
946 | + nbytes = 2; | ||
947 | + } | ||
948 | + else | ||
949 | + { | ||
950 | + bytes[2] = *symbol++; | ||
951 | + | ||
952 | + if ((bytes[2] & 0xc0) != 0x80) | ||
953 | + { | ||
954 | + is_utf8 = FALSE; | ||
955 | + symbol -= 2; | ||
956 | + nbytes = 1; | ||
957 | + } | ||
958 | + else if ((bytes[0] & 0x10) == 0) | ||
959 | + { | ||
960 | + is_utf8 = TRUE; | ||
961 | + nbytes = 3; | ||
962 | + } | ||
963 | + else | ||
964 | + { | ||
965 | + bytes[3] = *symbol++; | ||
966 | + | ||
967 | + nbytes = 4; | ||
968 | + | ||
969 | + if ((bytes[3] & 0xc0) != 0x80) | ||
970 | + { | ||
971 | + is_utf8 = FALSE; | ||
972 | + symbol -= 3; | ||
973 | + nbytes = 1; | ||
974 | + } | ||
975 | + else | ||
976 | + is_utf8 = TRUE; | ||
977 | + } | ||
978 | + } | ||
979 | + } | ||
980 | + | ||
981 | + if (unicode_display == unicode_invalid) | ||
982 | + is_utf8 = FALSE; | ||
983 | + | ||
984 | + if (unicode_display == unicode_hex || ! is_utf8) | ||
985 | + { | ||
986 | + unsigned int i; | ||
987 | + | ||
988 | + if (width_remaining < (nbytes * 2) + 2) | ||
989 | + break; | ||
990 | + | ||
991 | + putchar (is_utf8 ? '<' : '{'); | ||
992 | + printf ("0x"); | ||
993 | + for (i = 0; i < nbytes; i++) | ||
994 | + printf ("%02x", bytes[i]); | ||
995 | + putchar (is_utf8 ? '>' : '}'); | ||
996 | + } | ||
997 | + else | ||
998 | + { | ||
999 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
1000 | + printf ("\x1B[31;47m"); /* Red. */ | ||
1001 | + | ||
1002 | + switch (nbytes) | ||
1003 | + { | ||
1004 | + case 2: | ||
1005 | + if (width_remaining < 6) | ||
1006 | + break; | ||
1007 | + printf ("\\u%02x%02x", | ||
1008 | + (bytes[0] & 0x1c) >> 2, | ||
1009 | + ((bytes[0] & 0x03) << 6) | (bytes[1] & 0x3f)); | ||
1010 | + break; | ||
1011 | + case 3: | ||
1012 | + if (width_remaining < 6) | ||
1013 | + break; | ||
1014 | + printf ("\\u%02x%02x", | ||
1015 | + ((bytes[0] & 0x0f) << 4) | ((bytes[1] & 0x3c) >> 2), | ||
1016 | + ((bytes[1] & 0x03) << 6) | (bytes[2] & 0x3f)); | ||
1017 | + break; | ||
1018 | + case 4: | ||
1019 | + if (width_remaining < 8) | ||
1020 | + break; | ||
1021 | + printf ("\\u%02x%02x%02x", | ||
1022 | + ((bytes[0] & 0x07) << 6) | ((bytes[1] & 0x3c) >> 2), | ||
1023 | + ((bytes[1] & 0x03) << 6) | ((bytes[2] & 0x3c) >> 2), | ||
1024 | + ((bytes[2] & 0x03) << 6) | (bytes[3] & 0x3f)); | ||
1025 | + | ||
1026 | + break; | ||
1027 | + default: | ||
1028 | + /* URG. */ | ||
1029 | + break; | ||
1030 | + } | ||
1031 | + | ||
1032 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
1033 | + printf ("\033[0m"); /* Default colour. */ | ||
1034 | + } | ||
1035 | + | ||
1036 | + if (bytes[nbytes - 1] == 0) | ||
1037 | + break; | ||
1038 | } | ||
1039 | else | ||
1040 | { | ||
1041 | @@ -4556,6 +4702,7 @@ static struct option options[] = | ||
1042 | {"syms", no_argument, 0, 's'}, | ||
1043 | {"silent-truncation",no_argument, 0, 'T'}, | ||
1044 | {"section-details", no_argument, 0, 't'}, | ||
1045 | + {"unicode", required_argument, NULL, 'U'}, | ||
1046 | {"unwind", no_argument, 0, 'u'}, | ||
1047 | {"version-info", no_argument, 0, 'V'}, | ||
1048 | {"version", no_argument, 0, 'v'}, | ||
1049 | @@ -4609,6 +4756,11 @@ usage (FILE * stream) | ||
1050 | --no-demangle Do not demangle low-level symbol names. (This is the default)\n\ | ||
1051 | --recurse-limit Enable a demangling recursion limit. (This is the default)\n\ | ||
1052 | --no-recurse-limit Disable a demangling recursion limit\n\ | ||
1053 | + -U[dlexhi] --unicode=[default|locale|escape|hex|highlight|invalid]\n\ | ||
1054 | + Display unicode characters as determined by the current locale\n\ | ||
1055 | + (default), escape sequences, \"<hex sequences>\", highlighted\n\ | ||
1056 | + escape sequences, or treat them as invalid and display as\n\ | ||
1057 | + \"{hex sequences}\"\n\ | ||
1058 | -n --notes Display the core notes (if present)\n\ | ||
1059 | -r --relocs Display the relocations (if present)\n\ | ||
1060 | -u --unwind Display the unwind info (if present)\n\ | ||
1061 | @@ -4749,7 +4901,7 @@ parse_args (struct dump_data *dumpdata, | ||
1062 | usage (stderr); | ||
1063 | |||
1064 | while ((c = getopt_long | ||
1065 | - (argc, argv, "ACDHILNR:STVWacdeghi:lnp:rstuvw::x:z", options, NULL)) != EOF) | ||
1066 | + (argc, argv, "ACDHILNR:STU:VWacdeghi:lnp:rstuvw::x:z", options, NULL)) != EOF) | ||
1067 | { | ||
1068 | switch (c) | ||
1069 | { | ||
1070 | @@ -4943,6 +5095,24 @@ parse_args (struct dump_data *dumpdata, | ||
1071 | case OPTION_WITH_SYMBOL_VERSIONS: | ||
1072 | /* Ignored for backward compatibility. */ | ||
1073 | break; | ||
1074 | + case 'U': | ||
1075 | + if (optarg == NULL) | ||
1076 | + error (_("Missing arg to -U/--unicode")); /* Can this happen ? */ | ||
1077 | + else if (streq (optarg, "default") || streq (optarg, "d")) | ||
1078 | + unicode_display = unicode_default; | ||
1079 | + else if (streq (optarg, "locale") || streq (optarg, "l")) | ||
1080 | + unicode_display = unicode_locale; | ||
1081 | + else if (streq (optarg, "escape") || streq (optarg, "e")) | ||
1082 | + unicode_display = unicode_escape; | ||
1083 | + else if (streq (optarg, "invalid") || streq (optarg, "i")) | ||
1084 | + unicode_display = unicode_invalid; | ||
1085 | + else if (streq (optarg, "hex") || streq (optarg, "x")) | ||
1086 | + unicode_display = unicode_hex; | ||
1087 | + else if (streq (optarg, "highlight") || streq (optarg, "h")) | ||
1088 | + unicode_display = unicode_highlight; | ||
1089 | + else | ||
1090 | + error (_("invalid argument to -U/--unicode: %s"), optarg); | ||
1091 | + break; | ||
1092 | |||
1093 | default: | ||
1094 | /* xgettext:c-format */ | ||
1095 | diff --git a/binutils/strings.c b/binutils/strings.c | ||
1096 | --- a/binutils/strings.c 2021-12-23 03:23:38.485842662 -0800 | ||
1097 | +++ b/binutils/strings.c 2021-12-23 03:30:36.586877452 -0800 | ||
1098 | @@ -55,6 +55,19 @@ | ||
1099 | -T {bfdname} | ||
1100 | Specify a non-default object file format. | ||
1101 | |||
1102 | + --unicode={default|locale|invalid|hex|escape|highlight} | ||
1103 | + -u {d|l|i|x|e|h} | ||
1104 | + Determine how to handle UTF-8 unicode characters. The default | ||
1105 | + is no special treatment. All other versions of this option | ||
1106 | + only apply if the encoding is valid and enabling the option | ||
1107 | + implies --encoding=S. | ||
1108 | + The 'locale' option displays the characters according to the | ||
1109 | + current locale. The 'invalid' option treats them as | ||
1110 | + non-string characters. The 'hex' option displays them as hex | ||
1111 | + byte sequences. The 'escape' option displays them as escape | ||
1112 | + sequences and the 'highlight' option displays them as | ||
1113 | + coloured escape sequences. | ||
1114 | + | ||
1115 | --output-separator=sep_string | ||
1116 | -s sep_string String used to separate parsed strings in output. | ||
1117 | Default is newline. | ||
1118 | @@ -76,6 +89,22 @@ | ||
1119 | #include "safe-ctype.h" | ||
1120 | #include "bucomm.h" | ||
1121 | |||
1122 | +#ifndef streq | ||
1123 | +#define streq(a,b) (strcmp ((a),(b)) == 0) | ||
1124 | +#endif | ||
1125 | + | ||
1126 | +typedef enum unicode_display_type | ||
1127 | +{ | ||
1128 | + unicode_default = 0, | ||
1129 | + unicode_locale, | ||
1130 | + unicode_escape, | ||
1131 | + unicode_hex, | ||
1132 | + unicode_highlight, | ||
1133 | + unicode_invalid | ||
1134 | +} unicode_display_type; | ||
1135 | + | ||
1136 | +static unicode_display_type unicode_display = unicode_default; | ||
1137 | + | ||
1138 | #define STRING_ISGRAPHIC(c) \ | ||
1139 | ( (c) >= 0 \ | ||
1140 | && (c) <= 255 \ | ||
1141 | @@ -94,7 +123,7 @@ extern int errno; | ||
1142 | static int address_radix; | ||
1143 | |||
1144 | /* Minimum length of sequence of graphic chars to trigger output. */ | ||
1145 | -static int string_min; | ||
1146 | +static unsigned int string_min; | ||
1147 | |||
1148 | /* Whether or not we include all whitespace as a graphic char. */ | ||
1149 | static bfd_boolean include_all_whitespace; | ||
1150 | @@ -121,21 +150,22 @@ static char *output_separator; | ||
1151 | static struct option long_options[] = | ||
1152 | { | ||
1153 | {"all", no_argument, NULL, 'a'}, | ||
1154 | + {"bytes", required_argument, NULL, 'n'}, | ||
1155 | {"data", no_argument, NULL, 'd'}, | ||
1156 | + {"encoding", required_argument, NULL, 'e'}, | ||
1157 | + {"help", no_argument, NULL, 'h'}, | ||
1158 | + {"include-all-whitespace", no_argument, NULL, 'w'}, | ||
1159 | + {"output-separator", required_argument, NULL, 's'}, | ||
1160 | {"print-file-name", no_argument, NULL, 'f'}, | ||
1161 | - {"bytes", required_argument, NULL, 'n'}, | ||
1162 | {"radix", required_argument, NULL, 't'}, | ||
1163 | - {"include-all-whitespace", no_argument, NULL, 'w'}, | ||
1164 | - {"encoding", required_argument, NULL, 'e'}, | ||
1165 | {"target", required_argument, NULL, 'T'}, | ||
1166 | - {"output-separator", required_argument, NULL, 's'}, | ||
1167 | - {"help", no_argument, NULL, 'h'}, | ||
1168 | + {"unicode", required_argument, NULL, 'U'}, | ||
1169 | {"version", no_argument, NULL, 'v'}, | ||
1170 | {NULL, 0, NULL, 0} | ||
1171 | }; | ||
1172 | |||
1173 | static bfd_boolean strings_file (char *); | ||
1174 | -static void print_strings (const char *, FILE *, file_ptr, int, int, char *); | ||
1175 | +static void print_strings (const char *, FILE *, file_ptr, int, char *); | ||
1176 | static void usage (FILE *, int) ATTRIBUTE_NORETURN; | ||
1177 | |||
1178 | int main (int, char **); | ||
1179 | @@ -173,7 +203,7 @@ main (int argc, char **argv) | ||
1180 | encoding = 's'; | ||
1181 | output_separator = NULL; | ||
1182 | |||
1183 | - while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789", | ||
1184 | + while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:U:Vv0123456789", | ||
1185 | long_options, (int *) 0)) != EOF) | ||
1186 | { | ||
1187 | switch (optc) | ||
1188 | @@ -244,7 +274,24 @@ main (int argc, char **argv) | ||
1189 | |||
1190 | case 's': | ||
1191 | output_separator = optarg; | ||
1192 | - break; | ||
1193 | + break; | ||
1194 | + | ||
1195 | + case 'U': | ||
1196 | + if (streq (optarg, "default") || streq (optarg, "d")) | ||
1197 | + unicode_display = unicode_default; | ||
1198 | + else if (streq (optarg, "locale") || streq (optarg, "l")) | ||
1199 | + unicode_display = unicode_locale; | ||
1200 | + else if (streq (optarg, "escape") || streq (optarg, "e")) | ||
1201 | + unicode_display = unicode_escape; | ||
1202 | + else if (streq (optarg, "invalid") || streq (optarg, "i")) | ||
1203 | + unicode_display = unicode_invalid; | ||
1204 | + else if (streq (optarg, "hex") || streq (optarg, "x")) | ||
1205 | + unicode_display = unicode_hex; | ||
1206 | + else if (streq (optarg, "highlight") || streq (optarg, "h")) | ||
1207 | + unicode_display = unicode_highlight; | ||
1208 | + else | ||
1209 | + fatal (_("invalid argument to -U/--unicode: %s"), optarg); | ||
1210 | + break; | ||
1211 | |||
1212 | case 'V': | ||
1213 | case 'v': | ||
1214 | @@ -260,6 +307,9 @@ main (int argc, char **argv) | ||
1215 | } | ||
1216 | } | ||
1217 | |||
1218 | + if (unicode_display != unicode_default) | ||
1219 | + encoding = 'S'; | ||
1220 | + | ||
1221 | if (numeric_opt != 0) | ||
1222 | { | ||
1223 | string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0); | ||
1224 | @@ -295,14 +345,14 @@ main (int argc, char **argv) | ||
1225 | { | ||
1226 | datasection_only = FALSE; | ||
1227 | SET_BINARY (fileno (stdin)); | ||
1228 | - print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL); | ||
1229 | + print_strings ("{standard input}", stdin, 0, 0, (char *) NULL); | ||
1230 | files_given = TRUE; | ||
1231 | } | ||
1232 | else | ||
1233 | { | ||
1234 | for (; optind < argc; ++optind) | ||
1235 | { | ||
1236 | - if (strcmp (argv[optind], "-") == 0) | ||
1237 | + if (streq (argv[optind], "-")) | ||
1238 | datasection_only = FALSE; | ||
1239 | else | ||
1240 | { | ||
1241 | @@ -344,7 +394,7 @@ strings_a_section (bfd *abfd, asection * | ||
1242 | } | ||
1243 | |||
1244 | *got_a_section = TRUE; | ||
1245 | - print_strings (filename, NULL, sect->filepos, 0, sectsize, (char *) mem); | ||
1246 | + print_strings (filename, NULL, sect->filepos, sectsize, (char *) mem); | ||
1247 | free (mem); | ||
1248 | } | ||
1249 | |||
1250 | @@ -429,7 +479,7 @@ strings_file (char *file) | ||
1251 | return FALSE; | ||
1252 | } | ||
1253 | |||
1254 | - print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0); | ||
1255 | + print_strings (file, stream, (file_ptr) 0, 0, (char *) NULL); | ||
1256 | |||
1257 | if (fclose (stream) == EOF) | ||
1258 | { | ||
1259 | @@ -553,11 +603,632 @@ unget_part_char (long c, file_ptr *addre | ||
1260 | } | ||
1261 | } | ||
1262 | } | ||
1263 | + | ||
1264 | + | ||
1265 | +static void | ||
1266 | +print_filename_and_address (const char * filename, file_ptr address) | ||
1267 | +{ | ||
1268 | + if (print_filenames) | ||
1269 | + printf ("%s: ", filename); | ||
1270 | + | ||
1271 | + if (! print_addresses) | ||
1272 | + return; | ||
1273 | + | ||
1274 | + switch (address_radix) | ||
1275 | + { | ||
1276 | + case 8: | ||
1277 | + if (sizeof (address) > sizeof (long)) | ||
1278 | + { | ||
1279 | +#ifndef __MSVCRT__ | ||
1280 | + printf ("%7llo ", (unsigned long long) address); | ||
1281 | +#else | ||
1282 | + printf ("%7I64o ", (unsigned long long) address); | ||
1283 | +#endif | ||
1284 | + } | ||
1285 | + else | ||
1286 | + printf ("%7lo ", (unsigned long) address); | ||
1287 | + break; | ||
1288 | + | ||
1289 | + case 10: | ||
1290 | + if (sizeof (address) > sizeof (long)) | ||
1291 | + { | ||
1292 | +#ifndef __MSVCRT__ | ||
1293 | + printf ("%7llu ", (unsigned long long) address); | ||
1294 | +#else | ||
1295 | + printf ("%7I64d ", (unsigned long long) address); | ||
1296 | +#endif | ||
1297 | + } | ||
1298 | + else | ||
1299 | + printf ("%7ld ", (long) address); | ||
1300 | + break; | ||
1301 | + | ||
1302 | + case 16: | ||
1303 | + if (sizeof (address) > sizeof (long)) | ||
1304 | + { | ||
1305 | +#ifndef __MSVCRT__ | ||
1306 | + printf ("%7llx ", (unsigned long long) address); | ||
1307 | +#else | ||
1308 | + printf ("%7I64x ", (unsigned long long) address); | ||
1309 | +#endif | ||
1310 | + } | ||
1311 | + else | ||
1312 | + printf ("%7lx ", (unsigned long) address); | ||
1313 | + break; | ||
1314 | + } | ||
1315 | +} | ||
1316 | + | ||
1317 | +/* Return non-zero if the bytes starting at BUFFER form a valid UTF-8 encoding. | ||
1318 | + If the encoding is valid then returns the number of bytes it uses. */ | ||
1319 | + | ||
1320 | +static unsigned int | ||
1321 | +is_valid_utf8 (const unsigned char * buffer, unsigned long buflen) | ||
1322 | +{ | ||
1323 | + if (buffer[0] < 0xc0) | ||
1324 | + return 0; | ||
1325 | + | ||
1326 | + if (buflen < 2) | ||
1327 | + return 0; | ||
1328 | + | ||
1329 | + if ((buffer[1] & 0xc0) != 0x80) | ||
1330 | + return 0; | ||
1331 | + | ||
1332 | + if ((buffer[0] & 0x20) == 0) | ||
1333 | + return 2; | ||
1334 | + | ||
1335 | + if (buflen < 3) | ||
1336 | + return 0; | ||
1337 | + | ||
1338 | + if ((buffer[2] & 0xc0) != 0x80) | ||
1339 | + return 0; | ||
1340 | + | ||
1341 | + if ((buffer[0] & 0x10) == 0) | ||
1342 | + return 3; | ||
1343 | + | ||
1344 | + if (buflen < 4) | ||
1345 | + return 0; | ||
1346 | + | ||
1347 | + if ((buffer[3] & 0xc0) != 0x80) | ||
1348 | + return 0; | ||
1349 | + | ||
1350 | + return 4; | ||
1351 | +} | ||
1352 | + | ||
1353 | +/* Display a UTF-8 encoded character in BUFFER according to the setting | ||
1354 | + of unicode_display. The character is known to be valid. | ||
1355 | + Returns the number of bytes consumed. */ | ||
1356 | + | ||
1357 | +static unsigned int | ||
1358 | +display_utf8_char (const unsigned char * buffer) | ||
1359 | +{ | ||
1360 | + unsigned int j; | ||
1361 | + unsigned int utf8_len; | ||
1362 | + | ||
1363 | + switch (buffer[0] & 0x30) | ||
1364 | + { | ||
1365 | + case 0x00: | ||
1366 | + case 0x10: | ||
1367 | + utf8_len = 2; | ||
1368 | + break; | ||
1369 | + case 0x20: | ||
1370 | + utf8_len = 3; | ||
1371 | + break; | ||
1372 | + default: | ||
1373 | + utf8_len = 4; | ||
1374 | + } | ||
1375 | + | ||
1376 | + switch (unicode_display) | ||
1377 | + { | ||
1378 | + default: | ||
1379 | + fprintf (stderr, "ICE: unexpected unicode display type\n"); | ||
1380 | + break; | ||
1381 | + | ||
1382 | + case unicode_escape: | ||
1383 | + case unicode_highlight: | ||
1384 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
1385 | + printf ("\x1B[31;47m"); /* Red. */ | ||
1386 | + | ||
1387 | + switch (utf8_len) | ||
1388 | + { | ||
1389 | + case 2: | ||
1390 | + printf ("\\u%02x%02x", | ||
1391 | + ((buffer[0] & 0x1c) >> 2), | ||
1392 | + ((buffer[0] & 0x03) << 6) | (buffer[1] & 0x3f)); | ||
1393 | + break; | ||
1394 | + | ||
1395 | + case 3: | ||
1396 | + printf ("\\u%02x%02x", | ||
1397 | + ((buffer[0] & 0x0f) << 4) | ((buffer[1] & 0x3c) >> 2), | ||
1398 | + ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3f))); | ||
1399 | + break; | ||
1400 | + | ||
1401 | + case 4: | ||
1402 | + printf ("\\u%02x%02x%02x", | ||
1403 | + ((buffer[0] & 0x07) << 6) | ((buffer[1] & 0x3c) >> 2), | ||
1404 | + ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3c) >> 2), | ||
1405 | + ((buffer[2] & 0x03) << 6) | ((buffer[3] & 0x3f))); | ||
1406 | + break; | ||
1407 | + default: | ||
1408 | + /* URG. */ | ||
1409 | + break; | ||
1410 | + } | ||
1411 | + | ||
1412 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
1413 | + printf ("\033[0m"); /* Default colour. */ | ||
1414 | + break; | ||
1415 | + | ||
1416 | + case unicode_hex: | ||
1417 | + putchar ('<'); | ||
1418 | + printf ("0x"); | ||
1419 | + for (j = 0; j < utf8_len; j++) | ||
1420 | + printf ("%02x", buffer [j]); | ||
1421 | + putchar ('>'); | ||
1422 | + break; | ||
1423 | + | ||
1424 | + case unicode_locale: | ||
1425 | + printf ("%.1s", buffer); | ||
1426 | + break; | ||
1427 | + } | ||
1428 | + | ||
1429 | + return utf8_len; | ||
1430 | +} | ||
1431 | + | ||
1432 | +/* Display strings in BUFFER. Treat any UTF-8 encoded characters encountered | ||
1433 | + according to the setting of the unicode_display variable. The buffer | ||
1434 | + contains BUFLEN bytes. | ||
1435 | + | ||
1436 | + Display the characters as if they started at ADDRESS and are contained in | ||
1437 | + FILENAME. */ | ||
1438 | + | ||
1439 | +static void | ||
1440 | +print_unicode_buffer (const char * filename, | ||
1441 | + file_ptr address, | ||
1442 | + const unsigned char * buffer, | ||
1443 | + unsigned long buflen) | ||
1444 | +{ | ||
1445 | + /* Paranoia checks... */ | ||
1446 | + if (filename == NULL | ||
1447 | + || buffer == NULL | ||
1448 | + || unicode_display == unicode_default | ||
1449 | + || encoding != 'S' | ||
1450 | + || encoding_bytes != 1) | ||
1451 | + { | ||
1452 | + fprintf (stderr, "ICE: bad arguments to print_unicode_buffer\n"); | ||
1453 | + return; | ||
1454 | + } | ||
1455 | + | ||
1456 | + if (buflen == 0) | ||
1457 | + return; | ||
1458 | + | ||
1459 | + /* We must only display strings that are at least string_min *characters* | ||
1460 | + long. So we scan the buffer in two stages. First we locate the start | ||
1461 | + of a potential string. Then we walk along it until we have found | ||
1462 | + string_min characters. Then we go back to the start point and start | ||
1463 | + displaying characters according to the unicode_display setting. */ | ||
1464 | + | ||
1465 | + unsigned long start_point = 0; | ||
1466 | + unsigned long i = 0; | ||
1467 | + unsigned int char_len = 1; | ||
1468 | + unsigned int num_found = 0; | ||
1469 | + | ||
1470 | + for (i = 0; i < buflen; i += char_len) | ||
1471 | + { | ||
1472 | + int c = buffer[i]; | ||
1473 | + | ||
1474 | + char_len = 1; | ||
1475 | + | ||
1476 | + /* Find the first potential character of a string. */ | ||
1477 | + if (! STRING_ISGRAPHIC (c)) | ||
1478 | + { | ||
1479 | + num_found = 0; | ||
1480 | + continue; | ||
1481 | + } | ||
1482 | + | ||
1483 | + if (c > 126) | ||
1484 | + { | ||
1485 | + if (c < 0xc0) | ||
1486 | + { | ||
1487 | + num_found = 0; | ||
1488 | + continue; | ||
1489 | + } | ||
1490 | + | ||
1491 | + if ((char_len = is_valid_utf8 (buffer + i, buflen - i)) == 0) | ||
1492 | + { | ||
1493 | + char_len = 1; | ||
1494 | + num_found = 0; | ||
1495 | + continue; | ||
1496 | + } | ||
1497 | + | ||
1498 | + if (unicode_display == unicode_invalid) | ||
1499 | + { | ||
1500 | + /* We have found a valid UTF-8 character, but we treat it as non-graphic. */ | ||
1501 | + num_found = 0; | ||
1502 | + continue; | ||
1503 | + } | ||
1504 | + } | ||
1505 | + | ||
1506 | + if (num_found == 0) | ||
1507 | + /* We have found a potential starting point for a string. */ | ||
1508 | + start_point = i; | ||
1509 | + | ||
1510 | + ++ num_found; | ||
1511 | + | ||
1512 | + if (num_found >= string_min) | ||
1513 | + break; | ||
1514 | + } | ||
1515 | + | ||
1516 | + if (num_found < string_min) | ||
1517 | + return; | ||
1518 | + | ||
1519 | + print_filename_and_address (filename, address + start_point); | ||
1520 | + | ||
1521 | + /* We have found string_min characters. Display them and any | ||
1522 | + more that follow. */ | ||
1523 | + for (i = start_point; i < buflen; i += char_len) | ||
1524 | + { | ||
1525 | + int c = buffer[i]; | ||
1526 | + | ||
1527 | + char_len = 1; | ||
1528 | + | ||
1529 | + if (! STRING_ISGRAPHIC (c)) | ||
1530 | + break; | ||
1531 | + else if (c < 127) | ||
1532 | + putchar (c); | ||
1533 | + else if (! is_valid_utf8 (buffer + i, buflen - i)) | ||
1534 | + break; | ||
1535 | + else if (unicode_display == unicode_invalid) | ||
1536 | + break; | ||
1537 | + else | ||
1538 | + char_len = display_utf8_char (buffer + i); | ||
1539 | + } | ||
1540 | + | ||
1541 | + if (output_separator) | ||
1542 | + fputs (output_separator, stdout); | ||
1543 | + else | ||
1544 | + putchar ('\n'); | ||
1545 | + | ||
1546 | + /* FIXME: Using tail recursion here is lazy programming... */ | ||
1547 | + print_unicode_buffer (filename, address + i, buffer + i, buflen - i); | ||
1548 | +} | ||
1549 | + | ||
1550 | +static int | ||
1551 | +get_unicode_byte (FILE * stream, | ||
1552 | + unsigned char * putback, | ||
1553 | + unsigned int * num_putback, | ||
1554 | + unsigned int * num_read) | ||
1555 | +{ | ||
1556 | + if (* num_putback > 0) | ||
1557 | + { | ||
1558 | + * num_putback = * num_putback - 1; | ||
1559 | + return putback [* num_putback]; | ||
1560 | + } | ||
1561 | + | ||
1562 | + * num_read = * num_read + 1; | ||
1563 | + | ||
1564 | +#if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED | ||
1565 | + return getc_unlocked (stream); | ||
1566 | +#else | ||
1567 | + return getc (stream); | ||
1568 | +#endif | ||
1569 | +} | ||
1570 | + | ||
1571 | +/* Helper function for print_unicode_stream. */ | ||
1572 | + | ||
1573 | +static void | ||
1574 | +print_unicode_stream_body (const char * filename, | ||
1575 | + file_ptr address, | ||
1576 | + FILE * stream, | ||
1577 | + unsigned char * putback_buf, | ||
1578 | + unsigned int num_putback, | ||
1579 | + unsigned char * print_buf) | ||
1580 | +{ | ||
1581 | + /* It would be nice if we could just read the stream into a buffer | ||
1582 | + and then process if with print_unicode_buffer. But the input | ||
1583 | + might be huge or it might time-locked (eg stdin). So instead | ||
1584 | + we go one byte at a time... */ | ||
1585 | + | ||
1586 | + file_ptr start_point = 0; | ||
1587 | + unsigned int num_read = 0; | ||
1588 | + unsigned int num_chars = 0; | ||
1589 | + unsigned int num_print = 0; | ||
1590 | + int c; | ||
1591 | + | ||
1592 | + /* Find a series of string_min characters. Put them into print_buf. */ | ||
1593 | + do | ||
1594 | + { | ||
1595 | + if (num_chars >= string_min) | ||
1596 | + break; | ||
1597 | + | ||
1598 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1599 | + if (c == EOF) | ||
1600 | + break; | ||
1601 | + | ||
1602 | + if (! STRING_ISGRAPHIC (c)) | ||
1603 | + { | ||
1604 | + num_chars = num_print = 0; | ||
1605 | + continue; | ||
1606 | + } | ||
1607 | + | ||
1608 | + if (num_chars == 0) | ||
1609 | + start_point = num_read - 1; | ||
1610 | + | ||
1611 | + if (c < 127) | ||
1612 | + { | ||
1613 | + print_buf[num_print] = c; | ||
1614 | + num_chars ++; | ||
1615 | + num_print ++; | ||
1616 | + continue; | ||
1617 | + } | ||
1618 | + | ||
1619 | + if (c < 0xc0) | ||
1620 | + { | ||
1621 | + num_chars = num_print = 0; | ||
1622 | + continue; | ||
1623 | + } | ||
1624 | + | ||
1625 | + /* We *might* have a UTF-8 sequence. Time to start peeking. */ | ||
1626 | + char utf8[4]; | ||
1627 | + | ||
1628 | + utf8[0] = c; | ||
1629 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1630 | + if (c == EOF) | ||
1631 | + break; | ||
1632 | + utf8[1] = c; | ||
1633 | + | ||
1634 | + if ((utf8[1] & 0xc0) != 0x80) | ||
1635 | + { | ||
1636 | + /* Invalid UTF-8. */ | ||
1637 | + putback_buf[num_putback++] = utf8[1]; | ||
1638 | + num_chars = num_print = 0; | ||
1639 | + continue; | ||
1640 | + } | ||
1641 | + else if ((utf8[0] & 0x20) == 0) | ||
1642 | + { | ||
1643 | + /* A valid 2-byte UTF-8 encoding. */ | ||
1644 | + if (unicode_display == unicode_invalid) | ||
1645 | + { | ||
1646 | + putback_buf[num_putback++] = utf8[1]; | ||
1647 | + num_chars = num_print = 0; | ||
1648 | + } | ||
1649 | + else | ||
1650 | + { | ||
1651 | + print_buf[num_print ++] = utf8[0]; | ||
1652 | + print_buf[num_print ++] = utf8[1]; | ||
1653 | + num_chars ++; | ||
1654 | + } | ||
1655 | + continue; | ||
1656 | + } | ||
1657 | + | ||
1658 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1659 | + if (c == EOF) | ||
1660 | + break; | ||
1661 | + utf8[2] = c; | ||
1662 | + | ||
1663 | + if ((utf8[2] & 0xc0) != 0x80) | ||
1664 | + { | ||
1665 | + /* Invalid UTF-8. */ | ||
1666 | + putback_buf[num_putback++] = utf8[2]; | ||
1667 | + putback_buf[num_putback++] = utf8[1]; | ||
1668 | + num_chars = num_print = 0; | ||
1669 | + continue; | ||
1670 | + } | ||
1671 | + else if ((utf8[0] & 0x10) == 0) | ||
1672 | + { | ||
1673 | + /* A valid 3-byte UTF-8 encoding. */ | ||
1674 | + if (unicode_display == unicode_invalid) | ||
1675 | + { | ||
1676 | + putback_buf[num_putback++] = utf8[2]; | ||
1677 | + putback_buf[num_putback++] = utf8[1]; | ||
1678 | + num_chars = num_print = 0; | ||
1679 | + } | ||
1680 | + else | ||
1681 | + { | ||
1682 | + print_buf[num_print ++] = utf8[0]; | ||
1683 | + print_buf[num_print ++] = utf8[1]; | ||
1684 | + print_buf[num_print ++] = utf8[2]; | ||
1685 | + num_chars ++; | ||
1686 | + } | ||
1687 | + continue; | ||
1688 | + } | ||
1689 | + | ||
1690 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1691 | + if (c == EOF) | ||
1692 | + break; | ||
1693 | + utf8[3] = c; | ||
1694 | + | ||
1695 | + if ((utf8[3] & 0xc0) != 0x80) | ||
1696 | + { | ||
1697 | + /* Invalid UTF-8. */ | ||
1698 | + putback_buf[num_putback++] = utf8[3]; | ||
1699 | + putback_buf[num_putback++] = utf8[2]; | ||
1700 | + putback_buf[num_putback++] = utf8[1]; | ||
1701 | + num_chars = num_print = 0; | ||
1702 | + } | ||
1703 | + /* We have a valid 4-byte UTF-8 encoding. */ | ||
1704 | + else if (unicode_display == unicode_invalid) | ||
1705 | + { | ||
1706 | + putback_buf[num_putback++] = utf8[3]; | ||
1707 | + putback_buf[num_putback++] = utf8[1]; | ||
1708 | + putback_buf[num_putback++] = utf8[2]; | ||
1709 | + num_chars = num_print = 0; | ||
1710 | + } | ||
1711 | + else | ||
1712 | + { | ||
1713 | + print_buf[num_print ++] = utf8[0]; | ||
1714 | + print_buf[num_print ++] = utf8[1]; | ||
1715 | + print_buf[num_print ++] = utf8[2]; | ||
1716 | + print_buf[num_print ++] = utf8[3]; | ||
1717 | + num_chars ++; | ||
1718 | + } | ||
1719 | + } | ||
1720 | + while (1); | ||
1721 | + | ||
1722 | + if (num_chars >= string_min) | ||
1723 | + { | ||
1724 | + /* We know that we have string_min valid characters in print_buf, | ||
1725 | + and there may be more to come in the stream. Start displaying | ||
1726 | + them. */ | ||
1727 | + | ||
1728 | + print_filename_and_address (filename, address + start_point); | ||
1729 | + | ||
1730 | + unsigned int i; | ||
1731 | + for (i = 0; i < num_print;) | ||
1732 | + { | ||
1733 | + if (print_buf[i] < 127) | ||
1734 | + putchar (print_buf[i++]); | ||
1735 | + else | ||
1736 | + i += display_utf8_char (print_buf + i); | ||
1737 | + } | ||
1738 | + | ||
1739 | + /* OK so now we have to start read unchecked bytes. */ | ||
1740 | + | ||
1741 | + /* Find a series of string_min characters. Put them into print_buf. */ | ||
1742 | + do | ||
1743 | + { | ||
1744 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1745 | + if (c == EOF) | ||
1746 | + break; | ||
1747 | + | ||
1748 | + if (! STRING_ISGRAPHIC (c)) | ||
1749 | + break; | ||
1750 | + | ||
1751 | + if (c < 127) | ||
1752 | + { | ||
1753 | + putchar (c); | ||
1754 | + continue; | ||
1755 | + } | ||
1756 | + | ||
1757 | + if (c < 0xc0) | ||
1758 | + break; | ||
1759 | + | ||
1760 | + /* We *might* have a UTF-8 sequence. Time to start peeking. */ | ||
1761 | + unsigned char utf8[4]; | ||
1762 | + | ||
1763 | + utf8[0] = c; | ||
1764 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1765 | + if (c == EOF) | ||
1766 | + break; | ||
1767 | + utf8[1] = c; | ||
1768 | + | ||
1769 | + if ((utf8[1] & 0xc0) != 0x80) | ||
1770 | + { | ||
1771 | + /* Invalid UTF-8. */ | ||
1772 | + putback_buf[num_putback++] = utf8[1]; | ||
1773 | + break; | ||
1774 | + } | ||
1775 | + else if ((utf8[0] & 0x20) == 0) | ||
1776 | + { | ||
1777 | + /* Valid 2-byte UTF-8. */ | ||
1778 | + if (unicode_display == unicode_invalid) | ||
1779 | + { | ||
1780 | + putback_buf[num_putback++] = utf8[1]; | ||
1781 | + break; | ||
1782 | + } | ||
1783 | + else | ||
1784 | + { | ||
1785 | + (void) display_utf8_char (utf8); | ||
1786 | + continue; | ||
1787 | + } | ||
1788 | + } | ||
1789 | + | ||
1790 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1791 | + if (c == EOF) | ||
1792 | + break; | ||
1793 | + utf8[2] = c; | ||
1794 | + | ||
1795 | + if ((utf8[2] & 0xc0) != 0x80) | ||
1796 | + { | ||
1797 | + /* Invalid UTF-8. */ | ||
1798 | + putback_buf[num_putback++] = utf8[2]; | ||
1799 | + putback_buf[num_putback++] = utf8[1]; | ||
1800 | + break; | ||
1801 | + } | ||
1802 | + else if ((utf8[0] & 0x10) == 0) | ||
1803 | + { | ||
1804 | + /* Valid 3-byte UTF-8. */ | ||
1805 | + if (unicode_display == unicode_invalid) | ||
1806 | + { | ||
1807 | + putback_buf[num_putback++] = utf8[2]; | ||
1808 | + putback_buf[num_putback++] = utf8[1]; | ||
1809 | + break; | ||
1810 | + } | ||
1811 | + else | ||
1812 | + { | ||
1813 | + (void) display_utf8_char (utf8); | ||
1814 | + continue; | ||
1815 | + } | ||
1816 | + } | ||
1817 | + | ||
1818 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1819 | + if (c == EOF) | ||
1820 | + break; | ||
1821 | + utf8[3] = c; | ||
1822 | + | ||
1823 | + if ((utf8[3] & 0xc0) != 0x80) | ||
1824 | + { | ||
1825 | + /* Invalid UTF-8. */ | ||
1826 | + putback_buf[num_putback++] = utf8[3]; | ||
1827 | + putback_buf[num_putback++] = utf8[2]; | ||
1828 | + putback_buf[num_putback++] = utf8[1]; | ||
1829 | + break; | ||
1830 | + } | ||
1831 | + else if (unicode_display == unicode_invalid) | ||
1832 | + { | ||
1833 | + putback_buf[num_putback++] = utf8[3]; | ||
1834 | + putback_buf[num_putback++] = utf8[2]; | ||
1835 | + putback_buf[num_putback++] = utf8[1]; | ||
1836 | + break; | ||
1837 | + } | ||
1838 | + else | ||
1839 | + /* A valid 4-byte UTF-8 encoding. */ | ||
1840 | + (void) display_utf8_char (utf8); | ||
1841 | + } | ||
1842 | + while (1); | ||
1843 | + | ||
1844 | + if (output_separator) | ||
1845 | + fputs (output_separator, stdout); | ||
1846 | + else | ||
1847 | + putchar ('\n'); | ||
1848 | + } | ||
1849 | + | ||
1850 | + if (c != EOF) | ||
1851 | + /* FIXME: Using tail recursion here is lazy, but it works. */ | ||
1852 | + print_unicode_stream_body (filename, address + num_read, stream, putback_buf, num_putback, print_buf); | ||
1853 | +} | ||
1854 | + | ||
1855 | +/* Display strings read in from STREAM. Treat any UTF-8 encoded characters | ||
1856 | + encountered according to the setting of the unicode_display variable. | ||
1857 | + The stream is positioned at ADDRESS and is attached to FILENAME. */ | ||
1858 | + | ||
1859 | +static void | ||
1860 | +print_unicode_stream (const char * filename, | ||
1861 | + file_ptr address, | ||
1862 | + FILE * stream) | ||
1863 | +{ | ||
1864 | + /* Paranoia checks... */ | ||
1865 | + if (filename == NULL | ||
1866 | + || stream == NULL | ||
1867 | + || unicode_display == unicode_default | ||
1868 | + || encoding != 'S' | ||
1869 | + || encoding_bytes != 1) | ||
1870 | + { | ||
1871 | + fprintf (stderr, "ICE: bad arguments to print_unicode_stream\n"); | ||
1872 | + return; | ||
1873 | + } | ||
1874 | + | ||
1875 | + /* Allocate space for string_min 4-byte utf-8 characters. */ | ||
1876 | + unsigned char * print_buf = xmalloc ((4 * string_min) + 1); | ||
1877 | + /* We should never have to put back more than 4 bytes. */ | ||
1878 | + unsigned char putback_buf[5]; | ||
1879 | + unsigned int num_putback = 0; | ||
1880 | + | ||
1881 | + print_unicode_stream_body (filename, address, stream, putback_buf, num_putback, print_buf); | ||
1882 | + free (print_buf); | ||
1883 | +} | ||
1884 | + | ||
1885 | |||
1886 | /* Find the strings in file FILENAME, read from STREAM. | ||
1887 | Assume that STREAM is positioned so that the next byte read | ||
1888 | is at address ADDRESS in the file. | ||
1889 | - Stop reading at address STOP_POINT in the file, if nonzero. | ||
1890 | |||
1891 | If STREAM is NULL, do not read from it. | ||
1892 | The caller can supply a buffer of characters | ||
1893 | @@ -568,20 +1239,29 @@ unget_part_char (long c, file_ptr *addre | ||
1894 | |||
1895 | static void | ||
1896 | print_strings (const char *filename, FILE *stream, file_ptr address, | ||
1897 | - int stop_point, int magiccount, char *magic) | ||
1898 | + int magiccount, char *magic) | ||
1899 | { | ||
1900 | + if (unicode_display != unicode_default) | ||
1901 | + { | ||
1902 | + if (magic != NULL) | ||
1903 | + print_unicode_buffer (filename, address, | ||
1904 | + (const unsigned char *) magic, magiccount); | ||
1905 | + | ||
1906 | + if (stream != NULL) | ||
1907 | + print_unicode_stream (filename, address, stream); | ||
1908 | + return; | ||
1909 | + } | ||
1910 | + | ||
1911 | char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1)); | ||
1912 | |||
1913 | while (1) | ||
1914 | { | ||
1915 | file_ptr start; | ||
1916 | - int i; | ||
1917 | + unsigned int i; | ||
1918 | long c; | ||
1919 | |||
1920 | /* See if the next `string_min' chars are all graphic chars. */ | ||
1921 | tryline: | ||
1922 | - if (stop_point && address >= stop_point) | ||
1923 | - break; | ||
1924 | start = address; | ||
1925 | for (i = 0; i < string_min; i++) | ||
1926 | { | ||
1927 | @@ -604,69 +1284,7 @@ print_strings (const char *filename, FIL | ||
1928 | /* We found a run of `string_min' graphic characters. Print up | ||
1929 | to the next non-graphic character. */ | ||
1930 | |||
1931 | - if (print_filenames) | ||
1932 | - printf ("%s: ", filename); | ||
1933 | - if (print_addresses) | ||
1934 | - switch (address_radix) | ||
1935 | - { | ||
1936 | - case 8: | ||
1937 | -#ifdef HAVE_LONG_LONG | ||
1938 | - if (sizeof (start) > sizeof (long)) | ||
1939 | - { | ||
1940 | -# ifndef __MSVCRT__ | ||
1941 | - printf ("%7llo ", (unsigned long long) start); | ||
1942 | -# else | ||
1943 | - printf ("%7I64o ", (unsigned long long) start); | ||
1944 | -# endif | ||
1945 | - } | ||
1946 | - else | ||
1947 | -#elif !BFD_HOST_64BIT_LONG | ||
1948 | - if (start != (unsigned long) start) | ||
1949 | - printf ("++%7lo ", (unsigned long) start); | ||
1950 | - else | ||
1951 | -#endif | ||
1952 | - printf ("%7lo ", (unsigned long) start); | ||
1953 | - break; | ||
1954 | - | ||
1955 | - case 10: | ||
1956 | -#ifdef HAVE_LONG_LONG | ||
1957 | - if (sizeof (start) > sizeof (long)) | ||
1958 | - { | ||
1959 | -# ifndef __MSVCRT__ | ||
1960 | - printf ("%7llu ", (unsigned long long) start); | ||
1961 | -# else | ||
1962 | - printf ("%7I64d ", (unsigned long long) start); | ||
1963 | -# endif | ||
1964 | - } | ||
1965 | - else | ||
1966 | -#elif !BFD_HOST_64BIT_LONG | ||
1967 | - if (start != (unsigned long) start) | ||
1968 | - printf ("++%7lu ", (unsigned long) start); | ||
1969 | - else | ||
1970 | -#endif | ||
1971 | - printf ("%7ld ", (long) start); | ||
1972 | - break; | ||
1973 | - | ||
1974 | - case 16: | ||
1975 | -#ifdef HAVE_LONG_LONG | ||
1976 | - if (sizeof (start) > sizeof (long)) | ||
1977 | - { | ||
1978 | -# ifndef __MSVCRT__ | ||
1979 | - printf ("%7llx ", (unsigned long long) start); | ||
1980 | -# else | ||
1981 | - printf ("%7I64x ", (unsigned long long) start); | ||
1982 | -# endif | ||
1983 | - } | ||
1984 | - else | ||
1985 | -#elif !BFD_HOST_64BIT_LONG | ||
1986 | - if (start != (unsigned long) start) | ||
1987 | - printf ("%lx%8.8lx ", (unsigned long) (start >> 32), | ||
1988 | - (unsigned long) (start & 0xffffffff)); | ||
1989 | - else | ||
1990 | -#endif | ||
1991 | - printf ("%7lx ", (unsigned long) start); | ||
1992 | - break; | ||
1993 | - } | ||
1994 | + print_filename_and_address (filename, start); | ||
1995 | |||
1996 | buf[i] = '\0'; | ||
1997 | fputs (buf, stdout); | ||
1998 | @@ -718,6 +1336,8 @@ usage (FILE *stream, int status) | ||
1999 | -T --target=<BFDNAME> Specify the binary file format\n\ | ||
2000 | -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\ | ||
2001 | s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\ | ||
2002 | + --unicode={default|show|invalid|hex|escape|highlight}\n\ | ||
2003 | + -u {d|s|i|x|e|h} Specify how to treat UTF-8 encoded unicode characters\n\ | ||
2004 | -s --output-separator=<string> String used to separate strings in output.\n\ | ||
2005 | @<file> Read options from <file>\n\ | ||
2006 | -h --help Display this information\n\ | ||