diff options
author | pgowda <pgowda.cve@gmail.com> | 2021-12-20 01:50:09 -0800 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2021-12-22 23:11:45 +0000 |
commit | 3cb504cebafcbf6490c049efb5acd8e2fa4e95ec (patch) | |
tree | ed7967cbbae8652020d3b4533fc04e2722c364fe | |
parent | 3503555a8b3595ec6ab4d836176ca5309f32912c (diff) | |
download | poky-3cb504cebafcbf6490c049efb5acd8e2fa4e95ec.tar.gz |
binutils: CVE-2021-42574
Upstream-Status: Backport [https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5]
[RP: Merge uint -> unsigned int change]
(From OE-Core rev: fa242a41f3436f1d73eabee335573c1801bf7888)
Signed-off-by: pgowda <pgowda.cve@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r-- | meta/recipes-devtools/binutils/binutils-2.37.inc | 1 | ||||
-rw-r--r-- | meta/recipes-devtools/binutils/binutils/0001-CVE-2021-42574.patch | 2001 |
2 files changed, 2002 insertions, 0 deletions
diff --git a/meta/recipes-devtools/binutils/binutils-2.37.inc b/meta/recipes-devtools/binutils/binutils-2.37.inc index fca4a80ad2..043f7f8235 100644 --- a/meta/recipes-devtools/binutils/binutils-2.37.inc +++ b/meta/recipes-devtools/binutils/binutils-2.37.inc | |||
@@ -33,5 +33,6 @@ SRC_URI = "\ | |||
33 | file://0016-Check-for-clang-before-checking-gcc-version.patch \ | 33 | file://0016-Check-for-clang-before-checking-gcc-version.patch \ |
34 | file://0017-bfd-Close-the-file-descriptor-if-there-is-no-archive.patch \ | 34 | file://0017-bfd-Close-the-file-descriptor-if-there-is-no-archive.patch \ |
35 | file://0001-elf-Discard-input-.note.gnu.build-id-sections.patch \ | 35 | file://0001-elf-Discard-input-.note.gnu.build-id-sections.patch \ |
36 | file://0001-CVE-2021-42574.patch \ | ||
36 | " | 37 | " |
37 | S = "${WORKDIR}/git" | 38 | S = "${WORKDIR}/git" |
diff --git a/meta/recipes-devtools/binutils/binutils/0001-CVE-2021-42574.patch b/meta/recipes-devtools/binutils/binutils/0001-CVE-2021-42574.patch new file mode 100644 index 0000000000..0622ae389e --- /dev/null +++ b/meta/recipes-devtools/binutils/binutils/0001-CVE-2021-42574.patch | |||
@@ -0,0 +1,2001 @@ | |||
1 | From b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5 Mon Sep 17 00:00:00 2001 | ||
2 | From: Nick Clifton <nickc@redhat.com> | ||
3 | Date: Tue, 9 Nov 2021 13:25:42 +0000 | ||
4 | Subject: [PATCH] Add --unicode option to control how unicode characters are | ||
5 | handled by display tools. | ||
6 | |||
7 | * nm.c: Add --unicode option to control how unicode characters are | ||
8 | handled. | ||
9 | * objdump.c: Likewise. | ||
10 | * readelf.c: Likewise. | ||
11 | * strings.c: Likewise. | ||
12 | * binutils.texi: Document the new feature. | ||
13 | * NEWS: Document the new feature. | ||
14 | * testsuite/binutils-all/unicode.exp: New file. | ||
15 | * testsuite/binutils-all/nm.hex.unicode | ||
16 | * testsuite/binutils-all/strings.escape.unicode | ||
17 | * testsuite/binutils-all/objdump.highlight.unicode | ||
18 | * testsuite/binutils-all/readelf.invalid.unicode | ||
19 | |||
20 | CVE: CVE-2021-42574 | ||
21 | Upstream-Status: Backport [https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=b3aa80b45c4f46029efeb204bb9f2d2c4278a0e5] | ||
22 | |||
23 | RP: Added tweak uint -> unsigned int partial backport of | ||
24 | https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=795588aec4f894206863c938bd6d716895886009 | ||
25 | |||
26 | Signed-off-by: pgowda <pgowda.cve@gmail.com> | ||
27 | --- | ||
28 | binutils/ChangeLog | 15 + | ||
29 | binutils/NEWS | 9 + | ||
30 | binutils/doc/binutils.texi | 78 ++++ | ||
31 | binutils/nm.c | 228 ++++++++++- | ||
32 | binutils/objdump.c | 235 ++++++++++-- | ||
33 | binutils/readelf.c | 190 +++++++++- | ||
34 | binutils/strings.c | 757 ++++++++++++++++++++++++++++++++++--- | ||
35 | 7 files changed, 1409 insertions(+), 103 deletions(-) | ||
36 | |||
37 | diff --git a/binutils/ChangeLog b/binutils/ChangeLog | ||
38 | --- a/binutils/ChangeLog 2021-12-19 19:00:27.038540406 -0800 | ||
39 | +++ b/binutils/ChangeLog 2021-12-19 19:28:42.733565078 -0800 | ||
40 | @@ -1,3 +1,18 @@ | ||
41 | +2021-11-09 Nick Clifton <nickc@redhat.com> | ||
42 | + | ||
43 | + * nm.c: Add --unicode option to control how unicode characters are | ||
44 | + handled. | ||
45 | + * objdump.c: Likewise. | ||
46 | + * readelf.c: Likewise. | ||
47 | + * strings.c: Likewise. | ||
48 | + * binutils.texi: Document the new feature. | ||
49 | + * NEWS: Document the new feature. | ||
50 | + * testsuite/binutils-all/unicode.exp: New file. | ||
51 | + * testsuite/binutils-all/nm.hex.unicode | ||
52 | + * testsuite/binutils-all/strings.escape.unicode | ||
53 | + * testsuite/binutils-all/objdump.highlight.unicode | ||
54 | + * testsuite/binutils-all/readelf.invalid.unicode | ||
55 | + | ||
56 | 2021-07-16 Nick Clifton <nickc@redhat.com> | ||
57 | |||
58 | * po/sv.po: Updated Swedish translation. | ||
59 | diff --git a/binutils/doc/binutils.texi b/binutils/doc/binutils.texi | ||
60 | --- a/binutils/doc/binutils.texi 2021-12-19 19:00:27.042540338 -0800 | ||
61 | +++ b/binutils/doc/binutils.texi 2021-12-19 19:27:56.526354667 -0800 | ||
62 | @@ -812,6 +812,7 @@ nm [@option{-A}|@option{-o}|@option{--pr | ||
63 | [@option{-s}|@option{--print-armap}] | ||
64 | [@option{-t} @var{radix}|@option{--radix=}@var{radix}] | ||
65 | [@option{-u}|@option{--undefined-only}] | ||
66 | + [@option{-U} @var{method}] [@option{--unicode=}@var{method}] | ||
67 | [@option{-V}|@option{--version}] | ||
68 | [@option{-X 32_64}] | ||
69 | [@option{--defined-only}] | ||
70 | @@ -1132,6 +1133,21 @@ Use @var{radix} as the radix for printin | ||
71 | @cindex undefined symbols | ||
72 | Display only undefined symbols (those external to each object file). | ||
73 | |||
74 | +@item -U @var{[d|i|l|e|x|h]} | ||
75 | +@itemx --unicode=@var{[default|invalid|locale|escape|hex|highlight]} | ||
76 | +Controls the display of UTF-8 encoded mulibyte characters in strings. | ||
77 | +The default (@option{--unicode=default}) is to give them no special | ||
78 | +treatment. The @option{--unicode=locale} option displays the sequence | ||
79 | +in the current locale, which may or may not support them. The options | ||
80 | +@option{--unicode=hex} and @option{--unicode=invalid} display them as | ||
81 | +hex byte sequences enclosed by either angle brackets or curly braces. | ||
82 | + | ||
83 | +The @option{--unicode=escape} option displays them as escape sequences | ||
84 | +(@var{\uxxxx}) and the @option{--unicode=highlight} option displays | ||
85 | +them as escape sequences highlighted in red (if supported by the | ||
86 | +output device). The colouring is intended to draw attention to the | ||
87 | +presence of unicode sequences where they might not be expected. | ||
88 | + | ||
89 | @item -V | ||
90 | @itemx --version | ||
91 | Show the version number of @command{nm} and exit. | ||
92 | @@ -2247,6 +2263,7 @@ objdump [@option{-a}|@option{--archive-h | ||
93 | [@option{--prefix-strip=}@var{level}] | ||
94 | [@option{--insn-width=}@var{width}] | ||
95 | [@option{--visualize-jumps[=color|=extended-color|=off]} | ||
96 | + [@option{-U} @var{method}] [@option{--unicode=}@var{method}] | ||
97 | [@option{-V}|@option{--version}] | ||
98 | [@option{-H}|@option{--help}] | ||
99 | @var{objfile}@dots{} | ||
100 | @@ -2921,6 +2938,21 @@ When displaying symbols include those wh | ||
101 | special in some way and which would not normally be of interest to the | ||
102 | user. | ||
103 | |||
104 | +@item -U @var{[d|i|l|e|x|h]} | ||
105 | +@itemx --unicode=@var{[default|invalid|locale|escape|hex|highlight]} | ||
106 | +Controls the display of UTF-8 encoded mulibyte characters in strings. | ||
107 | +The default (@option{--unicode=default}) is to give them no special | ||
108 | +treatment. The @option{--unicode=locale} option displays the sequence | ||
109 | +in the current locale, which may or may not support them. The options | ||
110 | +@option{--unicode=hex} and @option{--unicode=invalid} display them as | ||
111 | +hex byte sequences enclosed by either angle brackets or curly braces. | ||
112 | + | ||
113 | +The @option{--unicode=escape} option displays them as escape sequences | ||
114 | +(@var{\uxxxx}) and the @option{--unicode=highlight} option displays | ||
115 | +them as escape sequences highlighted in red (if supported by the | ||
116 | +output device). The colouring is intended to draw attention to the | ||
117 | +presence of unicode sequences where they might not be expected. | ||
118 | + | ||
119 | @item -V | ||
120 | @itemx --version | ||
121 | Print the version number of @command{objdump} and exit. | ||
122 | @@ -3197,6 +3229,7 @@ strings [@option{-afovV}] [@option{-}@va | ||
123 | [@option{-n} @var{min-len}] [@option{--bytes=}@var{min-len}] | ||
124 | [@option{-t} @var{radix}] [@option{--radix=}@var{radix}] | ||
125 | [@option{-e} @var{encoding}] [@option{--encoding=}@var{encoding}] | ||
126 | + [@option{-U} @var{method}] [@option{--unicode=}@var{method}] | ||
127 | [@option{-}] [@option{--all}] [@option{--print-file-name}] | ||
128 | [@option{-T} @var{bfdname}] [@option{--target=}@var{bfdname}] | ||
129 | [@option{-w}] [@option{--include-all-whitespace}] | ||
130 | @@ -3288,6 +3321,28 @@ single-8-bit-byte characters, @samp{b} = | ||
131 | littleendian. Useful for finding wide character strings. (@samp{l} | ||
132 | and @samp{b} apply to, for example, Unicode UTF-16/UCS-2 encodings). | ||
133 | |||
134 | +@item -U @var{[d|i|l|e|x|h]} | ||
135 | +@itemx --unicode=@var{[default|invalid|locale|escape|hex|highlight]} | ||
136 | +Controls the display of UTF-8 encoded mulibyte characters in strings. | ||
137 | +The default (@option{--unicode=default}) is to give them no special | ||
138 | +treatment, and instead rely upon the setting of the | ||
139 | +@option{--encoding} option. The other values for this option | ||
140 | +automatically enable @option{--encoding=S}. | ||
141 | + | ||
142 | +The @option{--unicode=invalid} option treats them as non-graphic | ||
143 | +characters and hence not part of a valid string. All the remaining | ||
144 | +options treat them as valid string characters. | ||
145 | + | ||
146 | +The @option{--unicode=locale} option displays them in the current | ||
147 | +locale, which may or may not support UTF-8 encoding. The | ||
148 | +@option{--unicode=hex} option displays them as hex byte sequences | ||
149 | +enclosed between @var{<>} characters. The @option{--unicode=escape} | ||
150 | +option displays them as escape sequences (@var{\uxxxx}) and the | ||
151 | +@option{--unicode=highlight} option displays them as escape sequences | ||
152 | +highlighted in red (if supported by the output device). The colouring | ||
153 | +is intended to draw attention to the presence of unicode sequences | ||
154 | +where they might not be expected. | ||
155 | + | ||
156 | @item -T @var{bfdname} | ||
157 | @itemx --target=@var{bfdname} | ||
158 | @cindex object code format | ||
159 | @@ -4796,6 +4851,7 @@ readelf [@option{-a}|@option{--all}] | ||
160 | [@option{--demangle@var{=style}}|@option{--no-demangle}] | ||
161 | [@option{--quiet}] | ||
162 | [@option{--recurse-limit}|@option{--no-recurse-limit}] | ||
163 | + [@option{-U} @var{method}|@option{--unicode=}@var{method}] | ||
164 | [@option{-n}|@option{--notes}] | ||
165 | [@option{-r}|@option{--relocs}] | ||
166 | [@option{-u}|@option{--unwind}] | ||
167 | @@ -4962,6 +5018,28 @@ necessary in order to demangle truly com | ||
168 | that if the recursion limit is disabled then stack exhaustion is | ||
169 | possible and any bug reports about such an event will be rejected. | ||
170 | |||
171 | +@item -U @var{[d|i|l|e|x|h]} | ||
172 | +@itemx --unicode=[default|invalid|locale|escape|hex|highlight] | ||
173 | +Controls the display of non-ASCII characters in identifier names. | ||
174 | +The default (@option{--unicode=locale} or @option{--unicode=default}) is | ||
175 | +to treat them as multibyte characters and display them in the current | ||
176 | +locale. All other versions of this option treat the bytes as UTF-8 | ||
177 | +encoded values and attempt to interpret them. If they cannot be | ||
178 | +interpreted or if the @option{--unicode=invalid} option is used then | ||
179 | +they are displayed as a sequence of hex bytes, encloses in curly | ||
180 | +parethesis characters. | ||
181 | + | ||
182 | +Using the @option{--unicode=escape} option will display the characters | ||
183 | +as as unicode escape sequences (@var{\uxxxx}). Using the | ||
184 | +@option{--unicode=hex} will display the characters as hex byte | ||
185 | +sequences enclosed between angle brackets. | ||
186 | + | ||
187 | +Using the @option{--unicode=highlight} will display the characters as | ||
188 | +unicode escape sequences but it will also highlighted them in red, | ||
189 | +assuming that colouring is supported by the output device. The | ||
190 | +colouring is intended to draw attention to the presence of unicode | ||
191 | +sequences when they might not be expected. | ||
192 | + | ||
193 | @item -e | ||
194 | @itemx --headers | ||
195 | Display all the headers in the file. Equivalent to @option{-h -l -S}. | ||
196 | diff --git a/binutils/NEWS b/binutils/NEWS | ||
197 | --- a/binutils/NEWS 2021-12-19 19:00:27.038540406 -0800 | ||
198 | +++ b/binutils/NEWS 2021-12-19 19:30:04.764162972 -0800 | ||
199 | @@ -1,5 +1,14 @@ | ||
200 | -*- text -*- | ||
201 | |||
202 | +* Tools which display symbols or strings (readelf, strings, nm, objdump) | ||
203 | + have a new command line option which controls how unicode characters are | ||
204 | + handled. By default they are treated as normal for the tool. Using | ||
205 | + --unicode=locale will display them according to the current locale. | ||
206 | + Using --unicode=hex will display them as hex byte values, whilst | ||
207 | + --unicode=escape will display them as escape sequences. In addition | ||
208 | + using --unicode=highlight will display them as unicode escape sequences | ||
209 | + highlighted in red (if supported by the output device). | ||
210 | + | ||
211 | Changes in 2.37: | ||
212 | |||
213 | * The readelf tool has a new command line option which can be used to specify | ||
214 | diff --git a/binutils/nm.c b/binutils/nm.c | ||
215 | --- a/binutils/nm.c 2021-12-19 19:00:27.046540270 -0800 | ||
216 | +++ b/binutils/nm.c 2021-12-19 19:36:34.797491555 -0800 | ||
217 | @@ -38,6 +38,11 @@ | ||
218 | #include "bucomm.h" | ||
219 | #include "plugin-api.h" | ||
220 | #include "plugin.h" | ||
221 | +#include "safe-ctype.h" | ||
222 | + | ||
223 | +#ifndef streq | ||
224 | +#define streq(a,b) (strcmp ((a),(b)) == 0) | ||
225 | +#endif | ||
226 | |||
227 | /* When sorting by size, we use this structure to hold the size and a | ||
228 | pointer to the minisymbol. */ | ||
229 | @@ -216,6 +221,18 @@ static const char *plugin_target = NULL; | ||
230 | static bfd *lineno_cache_bfd; | ||
231 | static bfd *lineno_cache_rel_bfd; | ||
232 | |||
233 | +typedef enum unicode_display_type | ||
234 | +{ | ||
235 | + unicode_default = 0, | ||
236 | + unicode_locale, | ||
237 | + unicode_escape, | ||
238 | + unicode_hex, | ||
239 | + unicode_highlight, | ||
240 | + unicode_invalid | ||
241 | +} unicode_display_type; | ||
242 | + | ||
243 | +static unicode_display_type unicode_display = unicode_default; | ||
244 | + | ||
245 | enum long_option_values | ||
246 | { | ||
247 | OPTION_TARGET = 200, | ||
248 | @@ -260,6 +277,7 @@ static struct option long_options[] = | ||
249 | {"target", required_argument, 0, OPTION_TARGET}, | ||
250 | {"defined-only", no_argument, &defined_only, 1}, | ||
251 | {"undefined-only", no_argument, &undefined_only, 1}, | ||
252 | + {"unicode", required_argument, NULL, 'U'}, | ||
253 | {"version", no_argument, &show_version, 1}, | ||
254 | {"with-symbol-versions", no_argument, &with_symbol_versions, 1}, | ||
255 | {"without-symbol-versions", no_argument, &with_symbol_versions, 0}, | ||
256 | @@ -313,6 +331,8 @@ usage (FILE *stream, int status) | ||
257 | -t, --radix=RADIX Use RADIX for printing symbol values\n\ | ||
258 | --target=BFDNAME Specify the target object format as BFDNAME\n\ | ||
259 | -u, --undefined-only Display only undefined symbols\n\ | ||
260 | + -U {d|s|i|x|e|h} Specify how to treat UTF-8 encoded unicode characters\n\ | ||
261 | + --unicode={default|show|invalid|hex|escape|highlight}\n\ | ||
262 | --with-symbol-versions Display version strings after symbol names\n\ | ||
263 | -X 32_64 (ignored)\n\ | ||
264 | @FILE Read options from FILE\n\ | ||
265 | @@ -432,6 +452,187 @@ get_coff_symbol_type (const struct inter | ||
266 | return bufp; | ||
267 | } | ||
268 | |||
269 | +/* Convert a potential UTF-8 encoded sequence in IN into characters in OUT. | ||
270 | + The conversion format is controlled by the unicode_display variable. | ||
271 | + Returns the number of characters added to OUT. | ||
272 | + Returns the number of bytes consumed from IN in CONSUMED. | ||
273 | + Always consumes at least one byte and displays at least one character. */ | ||
274 | + | ||
275 | +static unsigned int | ||
276 | +display_utf8 (const unsigned char * in, char * out, unsigned int * consumed) | ||
277 | +{ | ||
278 | + char * orig_out = out; | ||
279 | + unsigned int nchars = 0; | ||
280 | + unsigned int j; | ||
281 | + | ||
282 | + if (unicode_display == unicode_default) | ||
283 | + goto invalid; | ||
284 | + | ||
285 | + if (in[0] < 0xc0) | ||
286 | + goto invalid; | ||
287 | + | ||
288 | + if ((in[1] & 0xc0) != 0x80) | ||
289 | + goto invalid; | ||
290 | + | ||
291 | + if ((in[0] & 0x20) == 0) | ||
292 | + { | ||
293 | + nchars = 2; | ||
294 | + goto valid; | ||
295 | + } | ||
296 | + | ||
297 | + if ((in[2] & 0xc0) != 0x80) | ||
298 | + goto invalid; | ||
299 | + | ||
300 | + if ((in[0] & 0x10) == 0) | ||
301 | + { | ||
302 | + nchars = 3; | ||
303 | + goto valid; | ||
304 | + } | ||
305 | + | ||
306 | + if ((in[3] & 0xc0) != 0x80) | ||
307 | + goto invalid; | ||
308 | + | ||
309 | + nchars = 4; | ||
310 | + | ||
311 | + valid: | ||
312 | + switch (unicode_display) | ||
313 | + { | ||
314 | + case unicode_locale: | ||
315 | + /* Copy the bytes into the output buffer as is. */ | ||
316 | + memcpy (out, in, nchars); | ||
317 | + out += nchars; | ||
318 | + break; | ||
319 | + | ||
320 | + case unicode_invalid: | ||
321 | + case unicode_hex: | ||
322 | + out += sprintf (out, "%c", unicode_display == unicode_hex ? '<' : '{'); | ||
323 | + out += sprintf (out, "0x"); | ||
324 | + for (j = 0; j < nchars; j++) | ||
325 | + out += sprintf (out, "%02x", in [j]); | ||
326 | + out += sprintf (out, "%c", unicode_display == unicode_hex ? '>' : '}'); | ||
327 | + break; | ||
328 | + | ||
329 | + case unicode_highlight: | ||
330 | + if (isatty (1)) | ||
331 | + out += sprintf (out, "\x1B[31;47m"); /* Red. */ | ||
332 | + /* Fall through. */ | ||
333 | + case unicode_escape: | ||
334 | + switch (nchars) | ||
335 | + { | ||
336 | + case 2: | ||
337 | + out += sprintf (out, "\\u%02x%02x", | ||
338 | + ((in[0] & 0x1c) >> 2), | ||
339 | + ((in[0] & 0x03) << 6) | (in[1] & 0x3f)); | ||
340 | + break; | ||
341 | + | ||
342 | + case 3: | ||
343 | + out += sprintf (out, "\\u%02x%02x", | ||
344 | + ((in[0] & 0x0f) << 4) | ((in[1] & 0x3c) >> 2), | ||
345 | + ((in[1] & 0x03) << 6) | ((in[2] & 0x3f))); | ||
346 | + break; | ||
347 | + | ||
348 | + case 4: | ||
349 | + out += sprintf (out, "\\u%02x%02x%02x", | ||
350 | + ((in[0] & 0x07) << 6) | ((in[1] & 0x3c) >> 2), | ||
351 | + ((in[1] & 0x03) << 6) | ((in[2] & 0x3c) >> 2), | ||
352 | + ((in[2] & 0x03) << 6) | ((in[3] & 0x3f))); | ||
353 | + break; | ||
354 | + default: | ||
355 | + /* URG. */ | ||
356 | + break; | ||
357 | + } | ||
358 | + | ||
359 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
360 | + out += sprintf (out, "\033[0m"); /* Default colour. */ | ||
361 | + break; | ||
362 | + | ||
363 | + default: | ||
364 | + /* URG */ | ||
365 | + break; | ||
366 | + } | ||
367 | + | ||
368 | + * consumed = nchars; | ||
369 | + return out - orig_out; | ||
370 | + | ||
371 | + invalid: | ||
372 | + /* Not a valid UTF-8 sequence. */ | ||
373 | + *out = *in; | ||
374 | + * consumed = 1; | ||
375 | + return 1; | ||
376 | +} | ||
377 | + | ||
378 | +/* Convert any UTF-8 encoded characters in NAME into the form specified by | ||
379 | + unicode_display. Also converts control characters. Returns a static | ||
380 | + buffer if conversion was necessary. | ||
381 | + Code stolen from objdump.c:sanitize_string(). */ | ||
382 | + | ||
383 | +static const char * | ||
384 | +convert_utf8 (const char * in) | ||
385 | +{ | ||
386 | + static char * buffer = NULL; | ||
387 | + static size_t buffer_len = 0; | ||
388 | + const char * original = in; | ||
389 | + char * out; | ||
390 | + | ||
391 | + /* Paranoia. */ | ||
392 | + if (in == NULL) | ||
393 | + return ""; | ||
394 | + | ||
395 | + /* See if any conversion is necessary. | ||
396 | + In the majority of cases it will not be needed. */ | ||
397 | + do | ||
398 | + { | ||
399 | + unsigned char c = *in++; | ||
400 | + | ||
401 | + if (c == 0) | ||
402 | + return original; | ||
403 | + | ||
404 | + if (ISCNTRL (c)) | ||
405 | + break; | ||
406 | + | ||
407 | + if (unicode_display != unicode_default && c >= 0xc0) | ||
408 | + break; | ||
409 | + } | ||
410 | + while (1); | ||
411 | + | ||
412 | + /* Copy the input, translating as needed. */ | ||
413 | + in = original; | ||
414 | + if (buffer_len < (strlen (in) * 9)) | ||
415 | + { | ||
416 | + free ((void *) buffer); | ||
417 | + buffer_len = strlen (in) * 9; | ||
418 | + buffer = xmalloc (buffer_len + 1); | ||
419 | + } | ||
420 | + | ||
421 | + out = buffer; | ||
422 | + do | ||
423 | + { | ||
424 | + unsigned char c = *in++; | ||
425 | + | ||
426 | + if (c == 0) | ||
427 | + break; | ||
428 | + | ||
429 | + if (ISCNTRL (c)) | ||
430 | + { | ||
431 | + *out++ = '^'; | ||
432 | + *out++ = c + 0x40; | ||
433 | + } | ||
434 | + else if (unicode_display != unicode_default && c >= 0xc0) | ||
435 | + { | ||
436 | + unsigned int num_consumed; | ||
437 | + | ||
438 | + out += display_utf8 ((const unsigned char *)(in - 1), out, & num_consumed); | ||
439 | + in += num_consumed - 1; | ||
440 | + } | ||
441 | + else | ||
442 | + *out++ = c; | ||
443 | + } | ||
444 | + while (1); | ||
445 | + | ||
446 | + *out = 0; | ||
447 | + return buffer; | ||
448 | +} | ||
449 | + | ||
450 | /* Print symbol name NAME, read from ABFD, with printf format FORM, | ||
451 | demangling it if requested. */ | ||
452 | |||
453 | @@ -444,6 +645,7 @@ print_symname (const char *form, struct | ||
454 | |||
455 | if (name == NULL) | ||
456 | name = info->sinfo->name; | ||
457 | + | ||
458 | if (!with_symbol_versions | ||
459 | && bfd_get_flavour (abfd) == bfd_target_elf_flavour) | ||
460 | { | ||
461 | @@ -451,6 +653,7 @@ print_symname (const char *form, struct | ||
462 | if (atver) | ||
463 | *atver = 0; | ||
464 | } | ||
465 | + | ||
466 | if (do_demangle && *name) | ||
467 | { | ||
468 | alloc = bfd_demangle (abfd, name, demangle_flags); | ||
469 | @@ -458,6 +661,11 @@ print_symname (const char *form, struct | ||
470 | name = alloc; | ||
471 | } | ||
472 | |||
473 | + if (unicode_display != unicode_default) | ||
474 | + { | ||
475 | + name = convert_utf8 (name); | ||
476 | + } | ||
477 | + | ||
478 | if (info != NULL && info->elfinfo && with_symbol_versions) | ||
479 | { | ||
480 | const char *version_string; | ||
481 | @@ -1807,7 +2015,7 @@ main (int argc, char **argv) | ||
482 | fatal (_("fatal error: libbfd ABI mismatch")); | ||
483 | set_default_bfd_target (); | ||
484 | |||
485 | - while ((c = getopt_long (argc, argv, "aABCDef:gHhjJlnopPrSst:uvVvX:", | ||
486 | + while ((c = getopt_long (argc, argv, "aABCDef:gHhjJlnopPrSst:uU:vVvX:", | ||
487 | long_options, (int *) 0)) != EOF) | ||
488 | { | ||
489 | switch (c) | ||
490 | @@ -1900,6 +2108,24 @@ main (int argc, char **argv) | ||
491 | case 'u': | ||
492 | undefined_only = 1; | ||
493 | break; | ||
494 | + | ||
495 | + case 'U': | ||
496 | + if (streq (optarg, "default") || streq (optarg, "d")) | ||
497 | + unicode_display = unicode_default; | ||
498 | + else if (streq (optarg, "locale") || streq (optarg, "l")) | ||
499 | + unicode_display = unicode_locale; | ||
500 | + else if (streq (optarg, "escape") || streq (optarg, "e")) | ||
501 | + unicode_display = unicode_escape; | ||
502 | + else if (streq (optarg, "invalid") || streq (optarg, "i")) | ||
503 | + unicode_display = unicode_invalid; | ||
504 | + else if (streq (optarg, "hex") || streq (optarg, "x")) | ||
505 | + unicode_display = unicode_hex; | ||
506 | + else if (streq (optarg, "highlight") || streq (optarg, "h")) | ||
507 | + unicode_display = unicode_highlight; | ||
508 | + else | ||
509 | + fatal (_("invalid argument to -U/--unicode: %s"), optarg); | ||
510 | + break; | ||
511 | + | ||
512 | case 'V': | ||
513 | show_version = 1; | ||
514 | break; | ||
515 | diff --git a/binutils/objdump.c b/binutils/objdump.c | ||
516 | --- a/binutils/objdump.c 2021-12-19 19:00:27.046540270 -0800 | ||
517 | +++ b/binutils/objdump.c 2021-12-19 19:43:09.438736729 -0800 | ||
518 | @@ -204,6 +204,18 @@ static const struct objdump_private_desc | ||
519 | |||
520 | /* The list of detected jumps inside a function. */ | ||
521 | static struct jump_info *detected_jumps = NULL; | ||
522 | + | ||
523 | +typedef enum unicode_display_type | ||
524 | +{ | ||
525 | + unicode_default = 0, | ||
526 | + unicode_locale, | ||
527 | + unicode_escape, | ||
528 | + unicode_hex, | ||
529 | + unicode_highlight, | ||
530 | + unicode_invalid | ||
531 | +} unicode_display_type; | ||
532 | + | ||
533 | +static unicode_display_type unicode_display = unicode_default; | ||
534 | |||
535 | static void usage (FILE *, int) ATTRIBUTE_NORETURN; | ||
536 | static void | ||
537 | @@ -330,6 +342,9 @@ usage (FILE *stream, int status) | ||
538 | fprintf (stream, _("\ | ||
539 | -w, --wide Format output for more than 80 columns\n")); | ||
540 | fprintf (stream, _("\ | ||
541 | + -U[d|l|i|x|e|h] Controls the display of UTF-8 unicode characters\n\ | ||
542 | + --unicode=[default|locale|invalid|hex|escape|highlight]\n")); | ||
543 | + fprintf (stream, _("\ | ||
544 | -z, --disassemble-zeroes Do not skip blocks of zeroes when disassembling\n")); | ||
545 | fprintf (stream, _("\ | ||
546 | --start-address=ADDR Only process data whose address is >= ADDR\n")); | ||
547 | @@ -420,17 +435,23 @@ static struct option long_options[]= | ||
548 | { | ||
549 | {"adjust-vma", required_argument, NULL, OPTION_ADJUST_VMA}, | ||
550 | {"all-headers", no_argument, NULL, 'x'}, | ||
551 | - {"private-headers", no_argument, NULL, 'p'}, | ||
552 | - {"private", required_argument, NULL, 'P'}, | ||
553 | {"architecture", required_argument, NULL, 'm'}, | ||
554 | {"archive-headers", no_argument, NULL, 'a'}, | ||
555 | +#ifdef ENABLE_LIBCTF | ||
556 | + {"ctf", required_argument, NULL, OPTION_CTF}, | ||
557 | + {"ctf-parent", required_argument, NULL, OPTION_CTF_PARENT}, | ||
558 | +#endif | ||
559 | {"debugging", no_argument, NULL, 'g'}, | ||
560 | {"debugging-tags", no_argument, NULL, 'e'}, | ||
561 | {"demangle", optional_argument, NULL, 'C'}, | ||
562 | {"disassemble", optional_argument, NULL, 'd'}, | ||
563 | {"disassemble-all", no_argument, NULL, 'D'}, | ||
564 | - {"disassembler-options", required_argument, NULL, 'M'}, | ||
565 | {"disassemble-zeroes", no_argument, NULL, 'z'}, | ||
566 | + {"disassembler-options", required_argument, NULL, 'M'}, | ||
567 | + {"dwarf", optional_argument, NULL, OPTION_DWARF}, | ||
568 | + {"dwarf-check", no_argument, 0, OPTION_DWARF_CHECK}, | ||
569 | + {"dwarf-depth", required_argument, 0, OPTION_DWARF_DEPTH}, | ||
570 | + {"dwarf-start", required_argument, 0, OPTION_DWARF_START}, | ||
571 | {"dynamic-reloc", no_argument, NULL, 'R'}, | ||
572 | {"dynamic-syms", no_argument, NULL, 'T'}, | ||
573 | {"endian", required_argument, NULL, OPTION_ENDIAN}, | ||
574 | @@ -440,16 +461,23 @@ static struct option long_options[]= | ||
575 | {"full-contents", no_argument, NULL, 's'}, | ||
576 | {"headers", no_argument, NULL, 'h'}, | ||
577 | {"help", no_argument, NULL, 'H'}, | ||
578 | + {"include", required_argument, NULL, 'I'}, | ||
579 | {"info", no_argument, NULL, 'i'}, | ||
580 | + {"inlines", no_argument, 0, OPTION_INLINES}, | ||
581 | + {"insn-width", required_argument, NULL, OPTION_INSN_WIDTH}, | ||
582 | {"line-numbers", no_argument, NULL, 'l'}, | ||
583 | - {"no-show-raw-insn", no_argument, &show_raw_insn, -1}, | ||
584 | {"no-addresses", no_argument, &no_addresses, 1}, | ||
585 | - {"process-links", no_argument, &process_links, true}, | ||
586 | + {"no-recurse-limit", no_argument, NULL, OPTION_NO_RECURSE_LIMIT}, | ||
587 | + {"no-recursion-limit", no_argument, NULL, OPTION_NO_RECURSE_LIMIT}, | ||
588 | + {"no-show-raw-insn", no_argument, &show_raw_insn, -1}, | ||
589 | + {"prefix", required_argument, NULL, OPTION_PREFIX}, | ||
590 | {"prefix-addresses", no_argument, &prefix_addresses, 1}, | ||
591 | + {"prefix-strip", required_argument, NULL, OPTION_PREFIX_STRIP}, | ||
592 | + {"private", required_argument, NULL, 'P'}, | ||
593 | + {"private-headers", no_argument, NULL, 'p'}, | ||
594 | + {"process-links", no_argument, &process_links, true}, | ||
595 | {"recurse-limit", no_argument, NULL, OPTION_RECURSE_LIMIT}, | ||
596 | {"recursion-limit", no_argument, NULL, OPTION_RECURSE_LIMIT}, | ||
597 | - {"no-recurse-limit", no_argument, NULL, OPTION_NO_RECURSE_LIMIT}, | ||
598 | - {"no-recursion-limit", no_argument, NULL, OPTION_NO_RECURSE_LIMIT}, | ||
599 | {"reloc", no_argument, NULL, 'r'}, | ||
600 | {"section", required_argument, NULL, 'j'}, | ||
601 | {"section-headers", no_argument, NULL, 'h'}, | ||
602 | @@ -457,28 +485,16 @@ static struct option long_options[]= | ||
603 | {"source", no_argument, NULL, 'S'}, | ||
604 | {"source-comment", optional_argument, NULL, OPTION_SOURCE_COMMENT}, | ||
605 | {"special-syms", no_argument, &dump_special_syms, 1}, | ||
606 | - {"include", required_argument, NULL, 'I'}, | ||
607 | - {"dwarf", optional_argument, NULL, OPTION_DWARF}, | ||
608 | -#ifdef ENABLE_LIBCTF | ||
609 | - {"ctf", required_argument, NULL, OPTION_CTF}, | ||
610 | - {"ctf-parent", required_argument, NULL, OPTION_CTF_PARENT}, | ||
611 | -#endif | ||
612 | {"stabs", no_argument, NULL, 'G'}, | ||
613 | {"start-address", required_argument, NULL, OPTION_START_ADDRESS}, | ||
614 | {"stop-address", required_argument, NULL, OPTION_STOP_ADDRESS}, | ||
615 | {"syms", no_argument, NULL, 't'}, | ||
616 | {"target", required_argument, NULL, 'b'}, | ||
617 | + {"unicode", required_argument, NULL, 'U'}, | ||
618 | {"version", no_argument, NULL, 'V'}, | ||
619 | - {"wide", no_argument, NULL, 'w'}, | ||
620 | - {"prefix", required_argument, NULL, OPTION_PREFIX}, | ||
621 | - {"prefix-strip", required_argument, NULL, OPTION_PREFIX_STRIP}, | ||
622 | - {"insn-width", required_argument, NULL, OPTION_INSN_WIDTH}, | ||
623 | - {"dwarf-depth", required_argument, 0, OPTION_DWARF_DEPTH}, | ||
624 | - {"dwarf-start", required_argument, 0, OPTION_DWARF_START}, | ||
625 | - {"dwarf-check", no_argument, 0, OPTION_DWARF_CHECK}, | ||
626 | - {"inlines", no_argument, 0, OPTION_INLINES}, | ||
627 | {"visualize-jumps", optional_argument, 0, OPTION_VISUALIZE_JUMPS}, | ||
628 | - {0, no_argument, 0, 0} | ||
629 | + {"wide", no_argument, NULL, 'w'}, | ||
630 | + {NULL, no_argument, NULL, 0} | ||
631 | }; | ||
632 | |||
633 | static void | ||
634 | @@ -488,9 +504,121 @@ nonfatal (const char *msg) | ||
635 | exit_status = 1; | ||
636 | } | ||
637 | |||
638 | +/* Convert a potential UTF-8 encoded sequence in IN into characters in OUT. | ||
639 | + The conversion format is controlled by the unicode_display variable. | ||
640 | + Returns the number of characters added to OUT. | ||
641 | + Returns the number of bytes consumed from IN in CONSUMED. | ||
642 | + Always consumes at least one byte and displays at least one character. */ | ||
643 | + | ||
644 | +static unsigned int | ||
645 | +display_utf8 (const unsigned char * in, char * out, unsigned int * consumed) | ||
646 | +{ | ||
647 | + char * orig_out = out; | ||
648 | + unsigned int nchars = 0; | ||
649 | + unsigned int j; | ||
650 | + | ||
651 | + if (unicode_display == unicode_default) | ||
652 | + goto invalid; | ||
653 | + | ||
654 | + if (in[0] < 0xc0) | ||
655 | + goto invalid; | ||
656 | + | ||
657 | + if ((in[1] & 0xc0) != 0x80) | ||
658 | + goto invalid; | ||
659 | + | ||
660 | + if ((in[0] & 0x20) == 0) | ||
661 | + { | ||
662 | + nchars = 2; | ||
663 | + goto valid; | ||
664 | + } | ||
665 | + | ||
666 | + if ((in[2] & 0xc0) != 0x80) | ||
667 | + goto invalid; | ||
668 | + | ||
669 | + if ((in[0] & 0x10) == 0) | ||
670 | + { | ||
671 | + nchars = 3; | ||
672 | + goto valid; | ||
673 | + } | ||
674 | + | ||
675 | + if ((in[3] & 0xc0) != 0x80) | ||
676 | + goto invalid; | ||
677 | + | ||
678 | + nchars = 4; | ||
679 | + | ||
680 | + valid: | ||
681 | + switch (unicode_display) | ||
682 | + { | ||
683 | + case unicode_locale: | ||
684 | + /* Copy the bytes into the output buffer as is. */ | ||
685 | + memcpy (out, in, nchars); | ||
686 | + out += nchars; | ||
687 | + break; | ||
688 | + | ||
689 | + case unicode_invalid: | ||
690 | + case unicode_hex: | ||
691 | + out += sprintf (out, "%c", unicode_display == unicode_hex ? '<' : '{'); | ||
692 | + out += sprintf (out, "0x"); | ||
693 | + for (j = 0; j < nchars; j++) | ||
694 | + out += sprintf (out, "%02x", in [j]); | ||
695 | + out += sprintf (out, "%c", unicode_display == unicode_hex ? '>' : '}'); | ||
696 | + break; | ||
697 | + | ||
698 | + case unicode_highlight: | ||
699 | + if (isatty (1)) | ||
700 | + out += sprintf (out, "\x1B[31;47m"); /* Red. */ | ||
701 | + /* Fall through. */ | ||
702 | + case unicode_escape: | ||
703 | + switch (nchars) | ||
704 | + { | ||
705 | + case 2: | ||
706 | + out += sprintf (out, "\\u%02x%02x", | ||
707 | + ((in[0] & 0x1c) >> 2), | ||
708 | + ((in[0] & 0x03) << 6) | (in[1] & 0x3f)); | ||
709 | + break; | ||
710 | + | ||
711 | + case 3: | ||
712 | + out += sprintf (out, "\\u%02x%02x", | ||
713 | + ((in[0] & 0x0f) << 4) | ((in[1] & 0x3c) >> 2), | ||
714 | + ((in[1] & 0x03) << 6) | ((in[2] & 0x3f))); | ||
715 | + break; | ||
716 | + | ||
717 | + case 4: | ||
718 | + out += sprintf (out, "\\u%02x%02x%02x", | ||
719 | + ((in[0] & 0x07) << 6) | ((in[1] & 0x3c) >> 2), | ||
720 | + ((in[1] & 0x03) << 6) | ((in[2] & 0x3c) >> 2), | ||
721 | + ((in[2] & 0x03) << 6) | ((in[3] & 0x3f))); | ||
722 | + break; | ||
723 | + default: | ||
724 | + /* URG. */ | ||
725 | + break; | ||
726 | + } | ||
727 | + | ||
728 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
729 | + out += sprintf (out, "\033[0m"); /* Default colour. */ | ||
730 | + break; | ||
731 | + | ||
732 | + default: | ||
733 | + /* URG */ | ||
734 | + break; | ||
735 | + } | ||
736 | + | ||
737 | + * consumed = nchars; | ||
738 | + return out - orig_out; | ||
739 | + | ||
740 | + invalid: | ||
741 | + /* Not a valid UTF-8 sequence. */ | ||
742 | + *out = *in; | ||
743 | + * consumed = 1; | ||
744 | + return 1; | ||
745 | +} | ||
746 | + | ||
747 | /* Returns a version of IN with any control characters | ||
748 | replaced by escape sequences. Uses a static buffer | ||
749 | - if necessary. */ | ||
750 | + if necessary. | ||
751 | + | ||
752 | + If unicode display is enabled, then also handles the | ||
753 | + conversion of unicode characters. */ | ||
754 | |||
755 | static const char * | ||
756 | sanitize_string (const char * in) | ||
757 | @@ -508,40 +636,50 @@ sanitize_string (const char * in) | ||
758 | of cases it will not be needed. */ | ||
759 | do | ||
760 | { | ||
761 | - char c = *in++; | ||
762 | + unsigned char c = *in++; | ||
763 | |||
764 | if (c == 0) | ||
765 | return original; | ||
766 | |||
767 | if (ISCNTRL (c)) | ||
768 | break; | ||
769 | + | ||
770 | + if (unicode_display != unicode_default && c >= 0xc0) | ||
771 | + break; | ||
772 | } | ||
773 | while (1); | ||
774 | |||
775 | /* Copy the input, translating as needed. */ | ||
776 | in = original; | ||
777 | - if (buffer_len < (strlen (in) * 2)) | ||
778 | + if (buffer_len < (strlen (in) * 9)) | ||
779 | { | ||
780 | free ((void *) buffer); | ||
781 | - buffer_len = strlen (in) * 2; | ||
782 | + buffer_len = strlen (in) * 9; | ||
783 | buffer = xmalloc (buffer_len + 1); | ||
784 | } | ||
785 | |||
786 | out = buffer; | ||
787 | do | ||
788 | { | ||
789 | - char c = *in++; | ||
790 | + unsigned char c = *in++; | ||
791 | |||
792 | if (c == 0) | ||
793 | break; | ||
794 | |||
795 | - if (!ISCNTRL (c)) | ||
796 | - *out++ = c; | ||
797 | - else | ||
798 | + if (ISCNTRL (c)) | ||
799 | { | ||
800 | *out++ = '^'; | ||
801 | *out++ = c + 0x40; | ||
802 | } | ||
803 | + else if (unicode_display != unicode_default && c >= 0xc0) | ||
804 | + { | ||
805 | + unsigned int num_consumed; | ||
806 | + | ||
807 | + out += display_utf8 ((const unsigned char *)(in - 1), out, & num_consumed); | ||
808 | + in += num_consumed - 1; | ||
809 | + } | ||
810 | + else | ||
811 | + *out++ = c; | ||
812 | } | ||
813 | while (1); | ||
814 | |||
815 | @@ -4529,6 +4667,24 @@ dump_symbols (bfd *abfd ATTRIBUTE_UNUSED | ||
816 | free (alloc); | ||
817 | } | ||
818 | } | ||
819 | + else if (unicode_display != unicode_default | ||
820 | + && name != NULL && *name != '\0') | ||
821 | + { | ||
822 | + const char * sanitized_name; | ||
823 | + | ||
824 | + /* If we want to sanitize the name, we do it here, and | ||
825 | + temporarily clobber it while calling bfd_print_symbol. | ||
826 | + FIXME: This is a gross hack. */ | ||
827 | + sanitized_name = sanitize_string (name); | ||
828 | + if (sanitized_name != name) | ||
829 | + (*current)->name = sanitized_name; | ||
830 | + else | ||
831 | + sanitized_name = NULL; | ||
832 | + bfd_print_symbol (cur_bfd, stdout, *current, | ||
833 | + bfd_print_symbol_all); | ||
834 | + if (sanitized_name != NULL) | ||
835 | + (*current)->name = name; | ||
836 | + } | ||
837 | else | ||
838 | bfd_print_symbol (cur_bfd, stdout, *current, | ||
839 | bfd_print_symbol_all); | ||
840 | @@ -5212,7 +5368,7 @@ main (int argc, char **argv) | ||
841 | set_default_bfd_target (); | ||
842 | |||
843 | while ((c = getopt_long (argc, argv, | ||
844 | - "pP:ib:m:M:VvCdDlfFaHhrRtTxsSI:j:wE:zgeGW::", | ||
845 | + "CDE:FGHI:LM:P:RSTU:VW::ab:defghij:lm:prstvwxz", | ||
846 | long_options, (int *) 0)) | ||
847 | != EOF) | ||
848 | { | ||
849 | @@ -5495,6 +5651,23 @@ main (int argc, char **argv) | ||
850 | seenflag = true; | ||
851 | break; | ||
852 | |||
853 | + case 'U': | ||
854 | + if (streq (optarg, "default") || streq (optarg, "d")) | ||
855 | + unicode_display = unicode_default; | ||
856 | + else if (streq (optarg, "locale") || streq (optarg, "l")) | ||
857 | + unicode_display = unicode_locale; | ||
858 | + else if (streq (optarg, "escape") || streq (optarg, "e")) | ||
859 | + unicode_display = unicode_escape; | ||
860 | + else if (streq (optarg, "invalid") || streq (optarg, "i")) | ||
861 | + unicode_display = unicode_invalid; | ||
862 | + else if (streq (optarg, "hex") || streq (optarg, "x")) | ||
863 | + unicode_display = unicode_hex; | ||
864 | + else if (streq (optarg, "highlight") || streq (optarg, "h")) | ||
865 | + unicode_display = unicode_highlight; | ||
866 | + else | ||
867 | + fatal (_("invalid argument to -U/--unicode: %s"), optarg); | ||
868 | + break; | ||
869 | + | ||
870 | case 'H': | ||
871 | usage (stdout, 0); | ||
872 | /* No need to set seenflag or to break - usage() does not return. */ | ||
873 | diff --git a/binutils/readelf.c b/binutils/readelf.c | ||
874 | --- a/binutils/readelf.c 2021-12-19 19:00:27.058540065 -0800 | ||
875 | +++ b/binutils/readelf.c 2021-12-19 19:27:56.538354462 -0800 | ||
876 | @@ -328,6 +328,19 @@ typedef enum print_mode | ||
877 | } | ||
878 | print_mode; | ||
879 | |||
880 | +typedef enum unicode_display_type | ||
881 | +{ | ||
882 | + unicode_default = 0, | ||
883 | + unicode_locale, | ||
884 | + unicode_escape, | ||
885 | + unicode_hex, | ||
886 | + unicode_highlight, | ||
887 | + unicode_invalid | ||
888 | +} unicode_display_type; | ||
889 | + | ||
890 | +static unicode_display_type unicode_display = unicode_default; | ||
891 | + | ||
892 | + | ||
893 | /* Versioned symbol info. */ | ||
894 | enum versioned_symbol_info | ||
895 | { | ||
896 | @@ -632,11 +645,18 @@ print_symbol (signed int width, const ch | ||
897 | if (c == 0) | ||
898 | break; | ||
899 | |||
900 | - /* Do not print control characters directly as they can affect terminal | ||
901 | - settings. Such characters usually appear in the names generated | ||
902 | - by the assembler for local labels. */ | ||
903 | - if (ISCNTRL (c)) | ||
904 | + if (ISPRINT (c)) | ||
905 | + { | ||
906 | + putchar (c); | ||
907 | + width_remaining --; | ||
908 | + num_printed ++; | ||
909 | + } | ||
910 | + else if (ISCNTRL (c)) | ||
911 | { | ||
912 | + /* Do not print control characters directly as they can affect terminal | ||
913 | + settings. Such characters usually appear in the names generated | ||
914 | + by the assembler for local labels. */ | ||
915 | + | ||
916 | if (width_remaining < 2) | ||
917 | break; | ||
918 | |||
919 | @@ -644,11 +664,137 @@ print_symbol (signed int width, const ch | ||
920 | width_remaining -= 2; | ||
921 | num_printed += 2; | ||
922 | } | ||
923 | - else if (ISPRINT (c)) | ||
924 | + else if (c == 0x7f) | ||
925 | { | ||
926 | - putchar (c); | ||
927 | - width_remaining --; | ||
928 | - num_printed ++; | ||
929 | + if (width_remaining < 5) | ||
930 | + break; | ||
931 | + printf ("<DEL>"); | ||
932 | + width_remaining -= 5; | ||
933 | + num_printed += 5; | ||
934 | + } | ||
935 | + else if (unicode_display != unicode_locale | ||
936 | + && unicode_display != unicode_default) | ||
937 | + { | ||
938 | + /* Display unicode characters as something else. */ | ||
939 | + unsigned char bytes[4]; | ||
940 | + bool is_utf8; | ||
941 | + unsigned int nbytes; | ||
942 | + | ||
943 | + bytes[0] = c; | ||
944 | + | ||
945 | + if (bytes[0] < 0xc0) | ||
946 | + { | ||
947 | + nbytes = 1; | ||
948 | + is_utf8 = false; | ||
949 | + } | ||
950 | + else | ||
951 | + { | ||
952 | + bytes[1] = *symbol++; | ||
953 | + | ||
954 | + if ((bytes[1] & 0xc0) != 0x80) | ||
955 | + { | ||
956 | + is_utf8 = false; | ||
957 | + /* Do not consume this character. It may only | ||
958 | + be the first byte in the sequence that was | ||
959 | + corrupt. */ | ||
960 | + --symbol; | ||
961 | + nbytes = 1; | ||
962 | + } | ||
963 | + else if ((bytes[0] & 0x20) == 0) | ||
964 | + { | ||
965 | + is_utf8 = true; | ||
966 | + nbytes = 2; | ||
967 | + } | ||
968 | + else | ||
969 | + { | ||
970 | + bytes[2] = *symbol++; | ||
971 | + | ||
972 | + if ((bytes[2] & 0xc0) != 0x80) | ||
973 | + { | ||
974 | + is_utf8 = false; | ||
975 | + symbol -= 2; | ||
976 | + nbytes = 1; | ||
977 | + } | ||
978 | + else if ((bytes[0] & 0x10) == 0) | ||
979 | + { | ||
980 | + is_utf8 = true; | ||
981 | + nbytes = 3; | ||
982 | + } | ||
983 | + else | ||
984 | + { | ||
985 | + bytes[3] = *symbol++; | ||
986 | + | ||
987 | + nbytes = 4; | ||
988 | + | ||
989 | + if ((bytes[3] & 0xc0) != 0x80) | ||
990 | + { | ||
991 | + is_utf8 = false; | ||
992 | + symbol -= 3; | ||
993 | + nbytes = 1; | ||
994 | + } | ||
995 | + else | ||
996 | + is_utf8 = true; | ||
997 | + } | ||
998 | + } | ||
999 | + } | ||
1000 | + | ||
1001 | + if (unicode_display == unicode_invalid) | ||
1002 | + is_utf8 = false; | ||
1003 | + | ||
1004 | + if (unicode_display == unicode_hex || ! is_utf8) | ||
1005 | + { | ||
1006 | + unsigned int i; | ||
1007 | + | ||
1008 | + if (width_remaining < (nbytes * 2) + 2) | ||
1009 | + break; | ||
1010 | + | ||
1011 | + putchar (is_utf8 ? '<' : '{'); | ||
1012 | + printf ("0x"); | ||
1013 | + for (i = 0; i < nbytes; i++) | ||
1014 | + printf ("%02x", bytes[i]); | ||
1015 | + putchar (is_utf8 ? '>' : '}'); | ||
1016 | + } | ||
1017 | + else | ||
1018 | + { | ||
1019 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
1020 | + printf ("\x1B[31;47m"); /* Red. */ | ||
1021 | + | ||
1022 | + switch (nbytes) | ||
1023 | + { | ||
1024 | + case 2: | ||
1025 | + if (width_remaining < 6) | ||
1026 | + break; | ||
1027 | + printf ("\\u%02x%02x", | ||
1028 | + (bytes[0] & 0x1c) >> 2, | ||
1029 | + ((bytes[0] & 0x03) << 6) | (bytes[1] & 0x3f)); | ||
1030 | + break; | ||
1031 | + case 3: | ||
1032 | + if (width_remaining < 6) | ||
1033 | + break; | ||
1034 | + printf ("\\u%02x%02x", | ||
1035 | + ((bytes[0] & 0x0f) << 4) | ((bytes[1] & 0x3c) >> 2), | ||
1036 | + ((bytes[1] & 0x03) << 6) | (bytes[2] & 0x3f)); | ||
1037 | + break; | ||
1038 | + case 4: | ||
1039 | + if (width_remaining < 8) | ||
1040 | + break; | ||
1041 | + printf ("\\u%02x%02x%02x", | ||
1042 | + ((bytes[0] & 0x07) << 6) | ((bytes[1] & 0x3c) >> 2), | ||
1043 | + ((bytes[1] & 0x03) << 6) | ((bytes[2] & 0x3c) >> 2), | ||
1044 | + ((bytes[2] & 0x03) << 6) | (bytes[3] & 0x3f)); | ||
1045 | + | ||
1046 | + break; | ||
1047 | + default: | ||
1048 | + /* URG. */ | ||
1049 | + break; | ||
1050 | + } | ||
1051 | + | ||
1052 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
1053 | + printf ("\033[0m"); /* Default colour. */ | ||
1054 | + } | ||
1055 | + | ||
1056 | + if (bytes[nbytes - 1] == 0) | ||
1057 | + break; | ||
1058 | } | ||
1059 | else | ||
1060 | { | ||
1061 | @@ -4668,6 +4814,7 @@ static struct option options[] = | ||
1062 | {"syms", no_argument, 0, 's'}, | ||
1063 | {"silent-truncation",no_argument, 0, 'T'}, | ||
1064 | {"section-details", no_argument, 0, 't'}, | ||
1065 | + {"unicode", required_argument, NULL, 'U'}, | ||
1066 | {"unwind", no_argument, 0, 'u'}, | ||
1067 | {"version-info", no_argument, 0, 'V'}, | ||
1068 | {"version", no_argument, 0, 'v'}, | ||
1069 | @@ -4744,6 +4891,12 @@ usage (FILE * stream) | ||
1070 | fprintf (stream, _("\ | ||
1071 | --no-recurse-limit Disable a demangling recursion limit\n")); | ||
1072 | fprintf (stream, _("\ | ||
1073 | + -U[dlexhi] --unicode=[default|locale|escape|hex|highlight|invalid]\n\ | ||
1074 | + Display unicode characters as determined by the current locale\n\ | ||
1075 | + (default), escape sequences, \"<hex sequences>\", highlighted\n\ | ||
1076 | + escape sequences, or treat them as invalid and display as\n\ | ||
1077 | + \"{hex sequences}\"\n")); | ||
1078 | + fprintf (stream, _("\ | ||
1079 | -n --notes Display the core notes (if present)\n")); | ||
1080 | fprintf (stream, _("\ | ||
1081 | -r --relocs Display the relocations (if present)\n")); | ||
1082 | @@ -4928,7 +5081,7 @@ parse_args (struct dump_data *dumpdata, | ||
1083 | usage (stderr); | ||
1084 | |||
1085 | while ((c = getopt_long | ||
1086 | - (argc, argv, "ACDHILNPR:STVWacdeghi:lnp:rstuvw::x:z", options, NULL)) != EOF) | ||
1087 | + (argc, argv, "ACDHILNPR:STU:VWacdeghi:lnp:rstuvw::x:z", options, NULL)) != EOF) | ||
1088 | { | ||
1089 | switch (c) | ||
1090 | { | ||
1091 | @@ -5130,6 +5283,25 @@ parse_args (struct dump_data *dumpdata, | ||
1092 | /* Ignored for backward compatibility. */ | ||
1093 | break; | ||
1094 | |||
1095 | + case 'U': | ||
1096 | + if (optarg == NULL) | ||
1097 | + error (_("Missing arg to -U/--unicode")); /* Can this happen ? */ | ||
1098 | + else if (streq (optarg, "default") || streq (optarg, "d")) | ||
1099 | + unicode_display = unicode_default; | ||
1100 | + else if (streq (optarg, "locale") || streq (optarg, "l")) | ||
1101 | + unicode_display = unicode_locale; | ||
1102 | + else if (streq (optarg, "escape") || streq (optarg, "e")) | ||
1103 | + unicode_display = unicode_escape; | ||
1104 | + else if (streq (optarg, "invalid") || streq (optarg, "i")) | ||
1105 | + unicode_display = unicode_invalid; | ||
1106 | + else if (streq (optarg, "hex") || streq (optarg, "x")) | ||
1107 | + unicode_display = unicode_hex; | ||
1108 | + else if (streq (optarg, "highlight") || streq (optarg, "h")) | ||
1109 | + unicode_display = unicode_highlight; | ||
1110 | + else | ||
1111 | + error (_("invalid argument to -U/--unicode: %s"), optarg); | ||
1112 | + break; | ||
1113 | + | ||
1114 | case OPTION_SYM_BASE: | ||
1115 | sym_base = 0; | ||
1116 | if (optarg != NULL) | ||
1117 | diff --git a/binutils/strings.c b/binutils/strings.c | ||
1118 | --- a/binutils/strings.c 2021-12-19 19:00:27.058540065 -0800 | ||
1119 | +++ b/binutils/strings.c 2021-12-19 19:48:26.205313218 -0800 | ||
1120 | @@ -55,6 +55,19 @@ | ||
1121 | -T {bfdname} | ||
1122 | Specify a non-default object file format. | ||
1123 | |||
1124 | + --unicode={default|locale|invalid|hex|escape|highlight} | ||
1125 | + -u {d|l|i|x|e|h} | ||
1126 | + Determine how to handle UTF-8 unicode characters. The default | ||
1127 | + is no special treatment. All other versions of this option | ||
1128 | + only apply if the encoding is valid and enabling the option | ||
1129 | + implies --encoding=S. | ||
1130 | + The 'locale' option displays the characters according to the | ||
1131 | + current locale. The 'invalid' option treats them as | ||
1132 | + non-string characters. The 'hex' option displays them as hex | ||
1133 | + byte sequences. The 'escape' option displays them as escape | ||
1134 | + sequences and the 'highlight' option displays them as | ||
1135 | + coloured escape sequences. | ||
1136 | + | ||
1137 | --output-separator=sep_string | ||
1138 | -s sep_string String used to separate parsed strings in output. | ||
1139 | Default is newline. | ||
1140 | @@ -76,6 +89,22 @@ | ||
1141 | #include "safe-ctype.h" | ||
1142 | #include "bucomm.h" | ||
1143 | |||
1144 | +#ifndef streq | ||
1145 | +#define streq(a,b) (strcmp ((a),(b)) == 0) | ||
1146 | +#endif | ||
1147 | + | ||
1148 | +typedef enum unicode_display_type | ||
1149 | +{ | ||
1150 | + unicode_default = 0, | ||
1151 | + unicode_locale, | ||
1152 | + unicode_escape, | ||
1153 | + unicode_hex, | ||
1154 | + unicode_highlight, | ||
1155 | + unicode_invalid | ||
1156 | +} unicode_display_type; | ||
1157 | + | ||
1158 | +static unicode_display_type unicode_display = unicode_default; | ||
1159 | + | ||
1160 | #define STRING_ISGRAPHIC(c) \ | ||
1161 | ( (c) >= 0 \ | ||
1162 | && (c) <= 255 \ | ||
1163 | @@ -94,7 +123,7 @@ extern int errno; | ||
1164 | static int address_radix; | ||
1165 | |||
1166 | /* Minimum length of sequence of graphic chars to trigger output. */ | ||
1167 | -static int string_min; | ||
1168 | +static unsigned int string_min; | ||
1169 | |||
1170 | /* Whether or not we include all whitespace as a graphic char. */ | ||
1171 | static bool include_all_whitespace; | ||
1172 | @@ -121,21 +150,22 @@ static char *output_separator; | ||
1173 | static struct option long_options[] = | ||
1174 | { | ||
1175 | {"all", no_argument, NULL, 'a'}, | ||
1176 | + {"bytes", required_argument, NULL, 'n'}, | ||
1177 | {"data", no_argument, NULL, 'd'}, | ||
1178 | + {"encoding", required_argument, NULL, 'e'}, | ||
1179 | + {"help", no_argument, NULL, 'h'}, | ||
1180 | + {"include-all-whitespace", no_argument, NULL, 'w'}, | ||
1181 | + {"output-separator", required_argument, NULL, 's'}, | ||
1182 | {"print-file-name", no_argument, NULL, 'f'}, | ||
1183 | - {"bytes", required_argument, NULL, 'n'}, | ||
1184 | {"radix", required_argument, NULL, 't'}, | ||
1185 | - {"include-all-whitespace", no_argument, NULL, 'w'}, | ||
1186 | - {"encoding", required_argument, NULL, 'e'}, | ||
1187 | {"target", required_argument, NULL, 'T'}, | ||
1188 | - {"output-separator", required_argument, NULL, 's'}, | ||
1189 | - {"help", no_argument, NULL, 'h'}, | ||
1190 | + {"unicode", required_argument, NULL, 'U'}, | ||
1191 | {"version", no_argument, NULL, 'v'}, | ||
1192 | {NULL, 0, NULL, 0} | ||
1193 | }; | ||
1194 | |||
1195 | static bool strings_file (char *); | ||
1196 | -static void print_strings (const char *, FILE *, file_ptr, int, int, char *); | ||
1197 | +static void print_strings (const char *, FILE *, file_ptr, int, char *); | ||
1198 | static void usage (FILE *, int) ATTRIBUTE_NORETURN; | ||
1199 | |||
1200 | int main (int, char **); | ||
1201 | @@ -171,7 +201,7 @@ main (int argc, char **argv) | ||
1202 | encoding = 's'; | ||
1203 | output_separator = NULL; | ||
1204 | |||
1205 | - while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789", | ||
1206 | + while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:U:Vv0123456789", | ||
1207 | long_options, (int *) 0)) != EOF) | ||
1208 | { | ||
1209 | switch (optc) | ||
1210 | @@ -244,6 +274,23 @@ main (int argc, char **argv) | ||
1211 | output_separator = optarg; | ||
1212 | break; | ||
1213 | |||
1214 | + case 'U': | ||
1215 | + if (streq (optarg, "default") || streq (optarg, "d")) | ||
1216 | + unicode_display = unicode_default; | ||
1217 | + else if (streq (optarg, "locale") || streq (optarg, "l")) | ||
1218 | + unicode_display = unicode_locale; | ||
1219 | + else if (streq (optarg, "escape") || streq (optarg, "e")) | ||
1220 | + unicode_display = unicode_escape; | ||
1221 | + else if (streq (optarg, "invalid") || streq (optarg, "i")) | ||
1222 | + unicode_display = unicode_invalid; | ||
1223 | + else if (streq (optarg, "hex") || streq (optarg, "x")) | ||
1224 | + unicode_display = unicode_hex; | ||
1225 | + else if (streq (optarg, "highlight") || streq (optarg, "h")) | ||
1226 | + unicode_display = unicode_highlight; | ||
1227 | + else | ||
1228 | + fatal (_("invalid argument to -U/--unicode: %s"), optarg); | ||
1229 | + break; | ||
1230 | + | ||
1231 | case 'V': | ||
1232 | case 'v': | ||
1233 | print_version ("strings"); | ||
1234 | @@ -258,6 +305,9 @@ main (int argc, char **argv) | ||
1235 | } | ||
1236 | } | ||
1237 | |||
1238 | + if (unicode_display != unicode_default) | ||
1239 | + encoding = 'S'; | ||
1240 | + | ||
1241 | if (numeric_opt != 0) | ||
1242 | { | ||
1243 | string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0); | ||
1244 | @@ -293,14 +343,14 @@ main (int argc, char **argv) | ||
1245 | { | ||
1246 | datasection_only = false; | ||
1247 | SET_BINARY (fileno (stdin)); | ||
1248 | - print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL); | ||
1249 | + print_strings ("{standard input}", stdin, 0, 0, (char *) NULL); | ||
1250 | files_given = true; | ||
1251 | } | ||
1252 | else | ||
1253 | { | ||
1254 | for (; optind < argc; ++optind) | ||
1255 | { | ||
1256 | - if (strcmp (argv[optind], "-") == 0) | ||
1257 | + if (streq (argv[optind], "-")) | ||
1258 | datasection_only = false; | ||
1259 | else | ||
1260 | { | ||
1261 | @@ -342,7 +392,7 @@ strings_a_section (bfd *abfd, asection * | ||
1262 | } | ||
1263 | |||
1264 | *got_a_section = true; | ||
1265 | - print_strings (filename, NULL, sect->filepos, 0, sectsize, (char *) mem); | ||
1266 | + print_strings (filename, NULL, sect->filepos, sectsize, (char *) mem); | ||
1267 | free (mem); | ||
1268 | } | ||
1269 | |||
1270 | @@ -427,7 +477,7 @@ strings_file (char *file) | ||
1271 | return false; | ||
1272 | } | ||
1273 | |||
1274 | - print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0); | ||
1275 | + print_strings (file, stream, (file_ptr) 0, 0, (char *) NULL); | ||
1276 | |||
1277 | if (fclose (stream) == EOF) | ||
1278 | { | ||
1279 | @@ -551,6 +601,626 @@ unget_part_char (long c, file_ptr *addre | ||
1280 | } | ||
1281 | } | ||
1282 | } | ||
1283 | + | ||
1284 | +static void | ||
1285 | +print_filename_and_address (const char * filename, file_ptr address) | ||
1286 | +{ | ||
1287 | + if (print_filenames) | ||
1288 | + printf ("%s: ", filename); | ||
1289 | + | ||
1290 | + if (! print_addresses) | ||
1291 | + return; | ||
1292 | + | ||
1293 | + switch (address_radix) | ||
1294 | + { | ||
1295 | + case 8: | ||
1296 | + if (sizeof (address) > sizeof (long)) | ||
1297 | + { | ||
1298 | +#ifndef __MSVCRT__ | ||
1299 | + printf ("%7llo ", (unsigned long long) address); | ||
1300 | +#else | ||
1301 | + printf ("%7I64o ", (unsigned long long) address); | ||
1302 | +#endif | ||
1303 | + } | ||
1304 | + else | ||
1305 | + printf ("%7lo ", (unsigned long) address); | ||
1306 | + break; | ||
1307 | + | ||
1308 | + case 10: | ||
1309 | + if (sizeof (address) > sizeof (long)) | ||
1310 | + { | ||
1311 | +#ifndef __MSVCRT__ | ||
1312 | + printf ("%7llu ", (unsigned long long) address); | ||
1313 | +#else | ||
1314 | + printf ("%7I64d ", (unsigned long long) address); | ||
1315 | +#endif | ||
1316 | + } | ||
1317 | + else | ||
1318 | + printf ("%7ld ", (long) address); | ||
1319 | + break; | ||
1320 | + | ||
1321 | + case 16: | ||
1322 | + if (sizeof (address) > sizeof (long)) | ||
1323 | + { | ||
1324 | +#ifndef __MSVCRT__ | ||
1325 | + printf ("%7llx ", (unsigned long long) address); | ||
1326 | +#else | ||
1327 | + printf ("%7I64x ", (unsigned long long) address); | ||
1328 | +#endif | ||
1329 | + } | ||
1330 | + else | ||
1331 | + printf ("%7lx ", (unsigned long) address); | ||
1332 | + break; | ||
1333 | + } | ||
1334 | +} | ||
1335 | + | ||
1336 | +/* Return non-zero if the bytes starting at BUFFER form a valid UTF-8 encoding. | ||
1337 | + If the encoding is valid then returns the number of bytes it uses. */ | ||
1338 | + | ||
1339 | +static unsigned int | ||
1340 | +is_valid_utf8 (const unsigned char * buffer, unsigned long buflen) | ||
1341 | +{ | ||
1342 | + if (buffer[0] < 0xc0) | ||
1343 | + return 0; | ||
1344 | + | ||
1345 | + if (buflen < 2) | ||
1346 | + return 0; | ||
1347 | + | ||
1348 | + if ((buffer[1] & 0xc0) != 0x80) | ||
1349 | + return 0; | ||
1350 | + | ||
1351 | + if ((buffer[0] & 0x20) == 0) | ||
1352 | + return 2; | ||
1353 | + | ||
1354 | + if (buflen < 3) | ||
1355 | + return 0; | ||
1356 | + | ||
1357 | + if ((buffer[2] & 0xc0) != 0x80) | ||
1358 | + return 0; | ||
1359 | + | ||
1360 | + if ((buffer[0] & 0x10) == 0) | ||
1361 | + return 3; | ||
1362 | + | ||
1363 | + if (buflen < 4) | ||
1364 | + return 0; | ||
1365 | + | ||
1366 | + if ((buffer[3] & 0xc0) != 0x80) | ||
1367 | + return 0; | ||
1368 | + | ||
1369 | + return 4; | ||
1370 | +} | ||
1371 | + | ||
1372 | +/* Display a UTF-8 encoded character in BUFFER according to the setting | ||
1373 | + of unicode_display. The character is known to be valid. | ||
1374 | + Returns the number of bytes consumed. */ | ||
1375 | + | ||
1376 | +static unsigned int | ||
1377 | +display_utf8_char (const unsigned char * buffer) | ||
1378 | +{ | ||
1379 | + unsigned int j; | ||
1380 | + unsigned int utf8_len; | ||
1381 | + | ||
1382 | + switch (buffer[0] & 0x30) | ||
1383 | + { | ||
1384 | + case 0x00: | ||
1385 | + case 0x10: | ||
1386 | + utf8_len = 2; | ||
1387 | + break; | ||
1388 | + case 0x20: | ||
1389 | + utf8_len = 3; | ||
1390 | + break; | ||
1391 | + default: | ||
1392 | + utf8_len = 4; | ||
1393 | + } | ||
1394 | + | ||
1395 | + switch (unicode_display) | ||
1396 | + { | ||
1397 | + default: | ||
1398 | + fprintf (stderr, "ICE: unexpected unicode display type\n"); | ||
1399 | + break; | ||
1400 | + | ||
1401 | + case unicode_escape: | ||
1402 | + case unicode_highlight: | ||
1403 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
1404 | + printf ("\x1B[31;47m"); /* Red. */ | ||
1405 | + | ||
1406 | + switch (utf8_len) | ||
1407 | + { | ||
1408 | + case 2: | ||
1409 | + printf ("\\u%02x%02x", | ||
1410 | + ((buffer[0] & 0x1c) >> 2), | ||
1411 | + ((buffer[0] & 0x03) << 6) | (buffer[1] & 0x3f)); | ||
1412 | + break; | ||
1413 | + | ||
1414 | + case 3: | ||
1415 | + printf ("\\u%02x%02x", | ||
1416 | + ((buffer[0] & 0x0f) << 4) | ((buffer[1] & 0x3c) >> 2), | ||
1417 | + ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3f))); | ||
1418 | + break; | ||
1419 | + | ||
1420 | + case 4: | ||
1421 | + printf ("\\u%02x%02x%02x", | ||
1422 | + ((buffer[0] & 0x07) << 6) | ((buffer[1] & 0x3c) >> 2), | ||
1423 | + ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3c) >> 2), | ||
1424 | + ((buffer[2] & 0x03) << 6) | ((buffer[3] & 0x3f))); | ||
1425 | + break; | ||
1426 | + default: | ||
1427 | + /* URG. */ | ||
1428 | + break; | ||
1429 | + } | ||
1430 | + | ||
1431 | + if (unicode_display == unicode_highlight && isatty (1)) | ||
1432 | + printf ("\033[0m"); /* Default colour. */ | ||
1433 | + break; | ||
1434 | + | ||
1435 | + case unicode_hex: | ||
1436 | + putchar ('<'); | ||
1437 | + printf ("0x"); | ||
1438 | + for (j = 0; j < utf8_len; j++) | ||
1439 | + printf ("%02x", buffer [j]); | ||
1440 | + putchar ('>'); | ||
1441 | + break; | ||
1442 | + | ||
1443 | + case unicode_locale: | ||
1444 | + printf ("%.1s", buffer); | ||
1445 | + break; | ||
1446 | + } | ||
1447 | + | ||
1448 | + return utf8_len; | ||
1449 | +} | ||
1450 | + | ||
1451 | +/* Display strings in BUFFER. Treat any UTF-8 encoded characters encountered | ||
1452 | + according to the setting of the unicode_display variable. The buffer | ||
1453 | + contains BUFLEN bytes. | ||
1454 | + | ||
1455 | + Display the characters as if they started at ADDRESS and are contained in | ||
1456 | + FILENAME. */ | ||
1457 | + | ||
1458 | +static void | ||
1459 | +print_unicode_buffer (const char * filename, | ||
1460 | + file_ptr address, | ||
1461 | + const unsigned char * buffer, | ||
1462 | + unsigned long buflen) | ||
1463 | +{ | ||
1464 | + /* Paranoia checks... */ | ||
1465 | + if (filename == NULL | ||
1466 | + || buffer == NULL | ||
1467 | + || unicode_display == unicode_default | ||
1468 | + || encoding != 'S' | ||
1469 | + || encoding_bytes != 1) | ||
1470 | + { | ||
1471 | + fprintf (stderr, "ICE: bad arguments to print_unicode_buffer\n"); | ||
1472 | + return; | ||
1473 | + } | ||
1474 | + | ||
1475 | + if (buflen == 0) | ||
1476 | + return; | ||
1477 | + | ||
1478 | + /* We must only display strings that are at least string_min *characters* | ||
1479 | + long. So we scan the buffer in two stages. First we locate the start | ||
1480 | + of a potential string. Then we walk along it until we have found | ||
1481 | + string_min characters. Then we go back to the start point and start | ||
1482 | + displaying characters according to the unicode_display setting. */ | ||
1483 | + | ||
1484 | + unsigned long start_point = 0; | ||
1485 | + unsigned long i = 0; | ||
1486 | + unsigned int char_len = 1; | ||
1487 | + unsigned int num_found = 0; | ||
1488 | + | ||
1489 | + for (i = 0; i < buflen; i += char_len) | ||
1490 | + { | ||
1491 | + int c = buffer[i]; | ||
1492 | + | ||
1493 | + char_len = 1; | ||
1494 | + | ||
1495 | + /* Find the first potential character of a string. */ | ||
1496 | + if (! STRING_ISGRAPHIC (c)) | ||
1497 | + { | ||
1498 | + num_found = 0; | ||
1499 | + continue; | ||
1500 | + } | ||
1501 | + | ||
1502 | + if (c > 126) | ||
1503 | + { | ||
1504 | + if (c < 0xc0) | ||
1505 | + { | ||
1506 | + num_found = 0; | ||
1507 | + continue; | ||
1508 | + } | ||
1509 | + | ||
1510 | + if ((char_len = is_valid_utf8 (buffer + i, buflen - i)) == 0) | ||
1511 | + { | ||
1512 | + char_len = 1; | ||
1513 | + num_found = 0; | ||
1514 | + continue; | ||
1515 | + } | ||
1516 | + | ||
1517 | + if (unicode_display == unicode_invalid) | ||
1518 | + { | ||
1519 | + /* We have found a valid UTF-8 character, but we treat it as non-graphic. */ | ||
1520 | + num_found = 0; | ||
1521 | + continue; | ||
1522 | + } | ||
1523 | + } | ||
1524 | + | ||
1525 | + if (num_found == 0) | ||
1526 | + /* We have found a potential starting point for a string. */ | ||
1527 | + start_point = i; | ||
1528 | + | ||
1529 | + ++ num_found; | ||
1530 | + | ||
1531 | + if (num_found >= string_min) | ||
1532 | + break; | ||
1533 | + } | ||
1534 | + | ||
1535 | + if (num_found < string_min) | ||
1536 | + return; | ||
1537 | + | ||
1538 | + print_filename_and_address (filename, address + start_point); | ||
1539 | + | ||
1540 | + /* We have found string_min characters. Display them and any | ||
1541 | + more that follow. */ | ||
1542 | + for (i = start_point; i < buflen; i += char_len) | ||
1543 | + { | ||
1544 | + int c = buffer[i]; | ||
1545 | + | ||
1546 | + char_len = 1; | ||
1547 | + | ||
1548 | + if (! STRING_ISGRAPHIC (c)) | ||
1549 | + break; | ||
1550 | + else if (c < 127) | ||
1551 | + putchar (c); | ||
1552 | + else if (! is_valid_utf8 (buffer + i, buflen - i)) | ||
1553 | + break; | ||
1554 | + else if (unicode_display == unicode_invalid) | ||
1555 | + break; | ||
1556 | + else | ||
1557 | + char_len = display_utf8_char (buffer + i); | ||
1558 | + } | ||
1559 | + | ||
1560 | + if (output_separator) | ||
1561 | + fputs (output_separator, stdout); | ||
1562 | + else | ||
1563 | + putchar ('\n'); | ||
1564 | + | ||
1565 | + /* FIXME: Using tail recursion here is lazy programming... */ | ||
1566 | + print_unicode_buffer (filename, address + i, buffer + i, buflen - i); | ||
1567 | +} | ||
1568 | + | ||
1569 | +static int | ||
1570 | +get_unicode_byte (FILE * stream, | ||
1571 | + unsigned char * putback, | ||
1572 | + unsigned int * num_putback, | ||
1573 | + unsigned int * num_read) | ||
1574 | +{ | ||
1575 | + if (* num_putback > 0) | ||
1576 | + { | ||
1577 | + * num_putback = * num_putback - 1; | ||
1578 | + return putback [* num_putback]; | ||
1579 | + } | ||
1580 | + | ||
1581 | + * num_read = * num_read + 1; | ||
1582 | + | ||
1583 | +#if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED | ||
1584 | + return getc_unlocked (stream); | ||
1585 | +#else | ||
1586 | + return getc (stream); | ||
1587 | +#endif | ||
1588 | +} | ||
1589 | + | ||
1590 | +/* Helper function for print_unicode_stream. */ | ||
1591 | + | ||
1592 | +static void | ||
1593 | +print_unicode_stream_body (const char * filename, | ||
1594 | + file_ptr address, | ||
1595 | + FILE * stream, | ||
1596 | + unsigned char * putback_buf, | ||
1597 | + unsigned int num_putback, | ||
1598 | + unsigned char * print_buf) | ||
1599 | +{ | ||
1600 | + /* It would be nice if we could just read the stream into a buffer | ||
1601 | + and then process if with print_unicode_buffer. But the input | ||
1602 | + might be huge or it might time-locked (eg stdin). So instead | ||
1603 | + we go one byte at a time... */ | ||
1604 | + | ||
1605 | + file_ptr start_point = 0; | ||
1606 | + unsigned int num_read = 0; | ||
1607 | + unsigned int num_chars = 0; | ||
1608 | + unsigned int num_print = 0; | ||
1609 | + int c = 0; | ||
1610 | + | ||
1611 | + /* Find a series of string_min characters. Put them into print_buf. */ | ||
1612 | + do | ||
1613 | + { | ||
1614 | + if (num_chars >= string_min) | ||
1615 | + break; | ||
1616 | + | ||
1617 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1618 | + if (c == EOF) | ||
1619 | + break; | ||
1620 | + | ||
1621 | + if (! STRING_ISGRAPHIC (c)) | ||
1622 | + { | ||
1623 | + num_chars = num_print = 0; | ||
1624 | + continue; | ||
1625 | + } | ||
1626 | + | ||
1627 | + if (num_chars == 0) | ||
1628 | + start_point = num_read - 1; | ||
1629 | + | ||
1630 | + if (c < 127) | ||
1631 | + { | ||
1632 | + print_buf[num_print] = c; | ||
1633 | + num_chars ++; | ||
1634 | + num_print ++; | ||
1635 | + continue; | ||
1636 | + } | ||
1637 | + | ||
1638 | + if (c < 0xc0) | ||
1639 | + { | ||
1640 | + num_chars = num_print = 0; | ||
1641 | + continue; | ||
1642 | + } | ||
1643 | + | ||
1644 | + /* We *might* have a UTF-8 sequence. Time to start peeking. */ | ||
1645 | + char utf8[4]; | ||
1646 | + | ||
1647 | + utf8[0] = c; | ||
1648 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1649 | + if (c == EOF) | ||
1650 | + break; | ||
1651 | + utf8[1] = c; | ||
1652 | + | ||
1653 | + if ((utf8[1] & 0xc0) != 0x80) | ||
1654 | + { | ||
1655 | + /* Invalid UTF-8. */ | ||
1656 | + putback_buf[num_putback++] = utf8[1]; | ||
1657 | + num_chars = num_print = 0; | ||
1658 | + continue; | ||
1659 | + } | ||
1660 | + else if ((utf8[0] & 0x20) == 0) | ||
1661 | + { | ||
1662 | + /* A valid 2-byte UTF-8 encoding. */ | ||
1663 | + if (unicode_display == unicode_invalid) | ||
1664 | + { | ||
1665 | + putback_buf[num_putback++] = utf8[1]; | ||
1666 | + num_chars = num_print = 0; | ||
1667 | + } | ||
1668 | + else | ||
1669 | + { | ||
1670 | + print_buf[num_print ++] = utf8[0]; | ||
1671 | + print_buf[num_print ++] = utf8[1]; | ||
1672 | + num_chars ++; | ||
1673 | + } | ||
1674 | + continue; | ||
1675 | + } | ||
1676 | + | ||
1677 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1678 | + if (c == EOF) | ||
1679 | + break; | ||
1680 | + utf8[2] = c; | ||
1681 | + | ||
1682 | + if ((utf8[2] & 0xc0) != 0x80) | ||
1683 | + { | ||
1684 | + /* Invalid UTF-8. */ | ||
1685 | + putback_buf[num_putback++] = utf8[2]; | ||
1686 | + putback_buf[num_putback++] = utf8[1]; | ||
1687 | + num_chars = num_print = 0; | ||
1688 | + continue; | ||
1689 | + } | ||
1690 | + else if ((utf8[0] & 0x10) == 0) | ||
1691 | + { | ||
1692 | + /* A valid 3-byte UTF-8 encoding. */ | ||
1693 | + if (unicode_display == unicode_invalid) | ||
1694 | + { | ||
1695 | + putback_buf[num_putback++] = utf8[2]; | ||
1696 | + putback_buf[num_putback++] = utf8[1]; | ||
1697 | + num_chars = num_print = 0; | ||
1698 | + } | ||
1699 | + else | ||
1700 | + { | ||
1701 | + print_buf[num_print ++] = utf8[0]; | ||
1702 | + print_buf[num_print ++] = utf8[1]; | ||
1703 | + print_buf[num_print ++] = utf8[2]; | ||
1704 | + num_chars ++; | ||
1705 | + } | ||
1706 | + continue; | ||
1707 | + } | ||
1708 | + | ||
1709 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1710 | + if (c == EOF) | ||
1711 | + break; | ||
1712 | + utf8[3] = c; | ||
1713 | + | ||
1714 | + if ((utf8[3] & 0xc0) != 0x80) | ||
1715 | + { | ||
1716 | + /* Invalid UTF-8. */ | ||
1717 | + putback_buf[num_putback++] = utf8[3]; | ||
1718 | + putback_buf[num_putback++] = utf8[2]; | ||
1719 | + putback_buf[num_putback++] = utf8[1]; | ||
1720 | + num_chars = num_print = 0; | ||
1721 | + } | ||
1722 | + /* We have a valid 4-byte UTF-8 encoding. */ | ||
1723 | + else if (unicode_display == unicode_invalid) | ||
1724 | + { | ||
1725 | + putback_buf[num_putback++] = utf8[3]; | ||
1726 | + putback_buf[num_putback++] = utf8[1]; | ||
1727 | + putback_buf[num_putback++] = utf8[2]; | ||
1728 | + num_chars = num_print = 0; | ||
1729 | + } | ||
1730 | + else | ||
1731 | + { | ||
1732 | + print_buf[num_print ++] = utf8[0]; | ||
1733 | + print_buf[num_print ++] = utf8[1]; | ||
1734 | + print_buf[num_print ++] = utf8[2]; | ||
1735 | + print_buf[num_print ++] = utf8[3]; | ||
1736 | + num_chars ++; | ||
1737 | + } | ||
1738 | + } | ||
1739 | + while (1); | ||
1740 | + | ||
1741 | + if (num_chars >= string_min) | ||
1742 | + { | ||
1743 | + /* We know that we have string_min valid characters in print_buf, | ||
1744 | + and there may be more to come in the stream. Start displaying | ||
1745 | + them. */ | ||
1746 | + | ||
1747 | + print_filename_and_address (filename, address + start_point); | ||
1748 | + | ||
1749 | + unsigned int i; | ||
1750 | + for (i = 0; i < num_print;) | ||
1751 | + { | ||
1752 | + if (print_buf[i] < 127) | ||
1753 | + putchar (print_buf[i++]); | ||
1754 | + else | ||
1755 | + i += display_utf8_char (print_buf + i); | ||
1756 | + } | ||
1757 | + | ||
1758 | + /* OK so now we have to start read unchecked bytes. */ | ||
1759 | + | ||
1760 | + /* Find a series of string_min characters. Put them into print_buf. */ | ||
1761 | + do | ||
1762 | + { | ||
1763 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1764 | + if (c == EOF) | ||
1765 | + break; | ||
1766 | + | ||
1767 | + if (! STRING_ISGRAPHIC (c)) | ||
1768 | + break; | ||
1769 | + | ||
1770 | + if (c < 127) | ||
1771 | + { | ||
1772 | + putchar (c); | ||
1773 | + continue; | ||
1774 | + } | ||
1775 | + | ||
1776 | + if (c < 0xc0) | ||
1777 | + break; | ||
1778 | + | ||
1779 | + /* We *might* have a UTF-8 sequence. Time to start peeking. */ | ||
1780 | + unsigned char utf8[4]; | ||
1781 | + | ||
1782 | + utf8[0] = c; | ||
1783 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1784 | + if (c == EOF) | ||
1785 | + break; | ||
1786 | + utf8[1] = c; | ||
1787 | + | ||
1788 | + if ((utf8[1] & 0xc0) != 0x80) | ||
1789 | + { | ||
1790 | + /* Invalid UTF-8. */ | ||
1791 | + putback_buf[num_putback++] = utf8[1]; | ||
1792 | + break; | ||
1793 | + } | ||
1794 | + else if ((utf8[0] & 0x20) == 0) | ||
1795 | + { | ||
1796 | + /* Valid 2-byte UTF-8. */ | ||
1797 | + if (unicode_display == unicode_invalid) | ||
1798 | + { | ||
1799 | + putback_buf[num_putback++] = utf8[1]; | ||
1800 | + break; | ||
1801 | + } | ||
1802 | + else | ||
1803 | + { | ||
1804 | + (void) display_utf8_char (utf8); | ||
1805 | + continue; | ||
1806 | + } | ||
1807 | + } | ||
1808 | + | ||
1809 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1810 | + if (c == EOF) | ||
1811 | + break; | ||
1812 | + utf8[2] = c; | ||
1813 | + | ||
1814 | + if ((utf8[2] & 0xc0) != 0x80) | ||
1815 | + { | ||
1816 | + /* Invalid UTF-8. */ | ||
1817 | + putback_buf[num_putback++] = utf8[2]; | ||
1818 | + putback_buf[num_putback++] = utf8[1]; | ||
1819 | + break; | ||
1820 | + } | ||
1821 | + else if ((utf8[0] & 0x10) == 0) | ||
1822 | + { | ||
1823 | + /* Valid 3-byte UTF-8. */ | ||
1824 | + if (unicode_display == unicode_invalid) | ||
1825 | + { | ||
1826 | + putback_buf[num_putback++] = utf8[2]; | ||
1827 | + putback_buf[num_putback++] = utf8[1]; | ||
1828 | + break; | ||
1829 | + } | ||
1830 | + else | ||
1831 | + { | ||
1832 | + (void) display_utf8_char (utf8); | ||
1833 | + continue; | ||
1834 | + } | ||
1835 | + } | ||
1836 | + | ||
1837 | + c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | ||
1838 | + if (c == EOF) | ||
1839 | + break; | ||
1840 | + utf8[3] = c; | ||
1841 | + | ||
1842 | + if ((utf8[3] & 0xc0) != 0x80) | ||
1843 | + { | ||
1844 | + /* Invalid UTF-8. */ | ||
1845 | + putback_buf[num_putback++] = utf8[3]; | ||
1846 | + putback_buf[num_putback++] = utf8[2]; | ||
1847 | + putback_buf[num_putback++] = utf8[1]; | ||
1848 | + break; | ||
1849 | + } | ||
1850 | + else if (unicode_display == unicode_invalid) | ||
1851 | + { | ||
1852 | + putback_buf[num_putback++] = utf8[3]; | ||
1853 | + putback_buf[num_putback++] = utf8[2]; | ||
1854 | + putback_buf[num_putback++] = utf8[1]; | ||
1855 | + break; | ||
1856 | + } | ||
1857 | + else | ||
1858 | + /* A valid 4-byte UTF-8 encoding. */ | ||
1859 | + (void) display_utf8_char (utf8); | ||
1860 | + } | ||
1861 | + while (1); | ||
1862 | + | ||
1863 | + if (output_separator) | ||
1864 | + fputs (output_separator, stdout); | ||
1865 | + else | ||
1866 | + putchar ('\n'); | ||
1867 | + } | ||
1868 | + | ||
1869 | + if (c != EOF) | ||
1870 | + /* FIXME: Using tail recursion here is lazy, but it works. */ | ||
1871 | + print_unicode_stream_body (filename, address + num_read, stream, putback_buf, num_putback, print_buf); | ||
1872 | +} | ||
1873 | + | ||
1874 | +/* Display strings read in from STREAM. Treat any UTF-8 encoded characters | ||
1875 | + encountered according to the setting of the unicode_display variable. | ||
1876 | + The stream is positioned at ADDRESS and is attached to FILENAME. */ | ||
1877 | + | ||
1878 | +static void | ||
1879 | +print_unicode_stream (const char * filename, | ||
1880 | + file_ptr address, | ||
1881 | + FILE * stream) | ||
1882 | +{ | ||
1883 | + /* Paranoia checks... */ | ||
1884 | + if (filename == NULL | ||
1885 | + || stream == NULL | ||
1886 | + || unicode_display == unicode_default | ||
1887 | + || encoding != 'S' | ||
1888 | + || encoding_bytes != 1) | ||
1889 | + { | ||
1890 | + fprintf (stderr, "ICE: bad arguments to print_unicode_stream\n"); | ||
1891 | + return; | ||
1892 | + } | ||
1893 | + | ||
1894 | + /* Allocate space for string_min 4-byte utf-8 characters. */ | ||
1895 | + unsigned char * print_buf = xmalloc ((4 * string_min) + 1); | ||
1896 | + /* We should never have to put back more than 4 bytes. */ | ||
1897 | + unsigned char putback_buf[5]; | ||
1898 | + unsigned int num_putback = 0; | ||
1899 | + | ||
1900 | + print_unicode_stream_body (filename, address, stream, putback_buf, num_putback, print_buf); | ||
1901 | + free (print_buf); | ||
1902 | +} | ||
1903 | |||
1904 | /* Find the strings in file FILENAME, read from STREAM. | ||
1905 | Assume that STREAM is positioned so that the next byte read | ||
1906 | @@ -566,20 +1236,29 @@ unget_part_char (long c, file_ptr *addre | ||
1907 | |||
1908 | static void | ||
1909 | print_strings (const char *filename, FILE *stream, file_ptr address, | ||
1910 | - int stop_point, int magiccount, char *magic) | ||
1911 | + int magiccount, char *magic) | ||
1912 | { | ||
1913 | + if (unicode_display != unicode_default) | ||
1914 | + { | ||
1915 | + if (magic != NULL) | ||
1916 | + print_unicode_buffer (filename, address, | ||
1917 | + (const unsigned char *) magic, magiccount); | ||
1918 | + | ||
1919 | + if (stream != NULL) | ||
1920 | + print_unicode_stream (filename, address, stream); | ||
1921 | + return; | ||
1922 | + } | ||
1923 | + | ||
1924 | char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1)); | ||
1925 | |||
1926 | while (1) | ||
1927 | { | ||
1928 | file_ptr start; | ||
1929 | - int i; | ||
1930 | + unsigned int i; | ||
1931 | long c; | ||
1932 | |||
1933 | /* See if the next `string_min' chars are all graphic chars. */ | ||
1934 | tryline: | ||
1935 | - if (stop_point && address >= stop_point) | ||
1936 | - break; | ||
1937 | start = address; | ||
1938 | for (i = 0; i < string_min; i++) | ||
1939 | { | ||
1940 | @@ -601,51 +1280,7 @@ print_strings (const char *filename, FIL | ||
1941 | |||
1942 | /* We found a run of `string_min' graphic characters. Print up | ||
1943 | to the next non-graphic character. */ | ||
1944 | - | ||
1945 | - if (print_filenames) | ||
1946 | - printf ("%s: ", filename); | ||
1947 | - if (print_addresses) | ||
1948 | - switch (address_radix) | ||
1949 | - { | ||
1950 | - case 8: | ||
1951 | - if (sizeof (start) > sizeof (long)) | ||
1952 | - { | ||
1953 | -#ifndef __MSVCRT__ | ||
1954 | - printf ("%7llo ", (unsigned long long) start); | ||
1955 | -#else | ||
1956 | - printf ("%7I64o ", (unsigned long long) start); | ||
1957 | -#endif | ||
1958 | - } | ||
1959 | - else | ||
1960 | - printf ("%7lo ", (unsigned long) start); | ||
1961 | - break; | ||
1962 | - | ||
1963 | - case 10: | ||
1964 | - if (sizeof (start) > sizeof (long)) | ||
1965 | - { | ||
1966 | -#ifndef __MSVCRT__ | ||
1967 | - printf ("%7llu ", (unsigned long long) start); | ||
1968 | -#else | ||
1969 | - printf ("%7I64d ", (unsigned long long) start); | ||
1970 | -#endif | ||
1971 | - } | ||
1972 | - else | ||
1973 | - printf ("%7ld ", (long) start); | ||
1974 | - break; | ||
1975 | - | ||
1976 | - case 16: | ||
1977 | - if (sizeof (start) > sizeof (long)) | ||
1978 | - { | ||
1979 | -#ifndef __MSVCRT__ | ||
1980 | - printf ("%7llx ", (unsigned long long) start); | ||
1981 | -#else | ||
1982 | - printf ("%7I64x ", (unsigned long long) start); | ||
1983 | -#endif | ||
1984 | - } | ||
1985 | - else | ||
1986 | - printf ("%7lx ", (unsigned long) start); | ||
1987 | - break; | ||
1988 | - } | ||
1989 | + print_filename_and_address (filename, start); | ||
1990 | |||
1991 | buf[i] = '\0'; | ||
1992 | fputs (buf, stdout); | ||
1993 | @@ -697,6 +1332,8 @@ usage (FILE *stream, int status) | ||
1994 | -T --target=<BFDNAME> Specify the binary file format\n\ | ||
1995 | -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\ | ||
1996 | s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\ | ||
1997 | + --unicode={default|show|invalid|hex|escape|highlight}\n\ | ||
1998 | + -u {d|s|i|x|e|h} Specify how to treat UTF-8 encoded unicode characters\n\ | ||
1999 | -s --output-separator=<string> String used to separate strings in output.\n\ | ||
2000 | @<file> Read options from <file>\n\ | ||
2001 | -h --help Display this information\n\ | ||