summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/gcc/gcc/0003-CVE-2021-42574.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc/0003-CVE-2021-42574.patch')
-rw-r--r--meta/recipes-devtools/gcc/gcc/0003-CVE-2021-42574.patch1724
1 files changed, 1724 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc/0003-CVE-2021-42574.patch b/meta/recipes-devtools/gcc/gcc/0003-CVE-2021-42574.patch
new file mode 100644
index 0000000000..6bfaf8402d
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc/0003-CVE-2021-42574.patch
@@ -0,0 +1,1724 @@
1From 51c500269bf53749b107807d84271385fad35628 Mon Sep 17 00:00:00 2001
2From: Marek Polacek <polacek@redhat.com>
3Date: Wed, 6 Oct 2021 14:33:59 -0400
4Subject: [PATCH] libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026]
5
6From a link below:
7"An issue was discovered in the Bidirectional Algorithm in the Unicode
8Specification through 14.0. It permits the visual reordering of
9characters via control sequences, which can be used to craft source code
10that renders different logic than the logical ordering of tokens
11ingested by compilers and interpreters. Adversaries can leverage this to
12encode source code for compilers accepting Unicode such that targeted
13vulnerabilities are introduced invisibly to human reviewers."
14
15More info:
16https://nvd.nist.gov/vuln/detail/CVE-2021-42574
17https://trojansource.codes/
18
19This is not a compiler bug. However, to mitigate the problem, this patch
20implements -Wbidi-chars=[none|unpaired|any] to warn about possibly
21misleading Unicode bidirectional control characters the preprocessor may
22encounter.
23
24The default is =unpaired, which warns about improperly terminated
25bidirectional control characters; e.g. a LRE without its corresponding PDF.
26The level =any warns about any use of bidirectional control characters.
27
28This patch handles both UCNs and UTF-8 characters. UCNs designating
29bidi characters in identifiers are accepted since r204886. Then r217144
30enabled -fextended-identifiers by default. Extended characters in C/C++
31identifiers have been accepted since r275979. However, this patch still
32warns about mixing UTF-8 and UCN bidi characters; there seems to be no
33good reason to allow mixing them.
34
35We warn in different contexts: comments (both C and C++-style), string
36literals, character constants, and identifiers. Expectedly, UCNs are ignored
37in comments and raw string literals. The bidirectional control characters
38can nest so this patch handles that as well.
39
40I have not included nor tested this at all with Fortran (which also has
41string literals and line comments).
42
43Dave M. posted patches improving diagnostic involving Unicode characters.
44This patch does not make use of this new infrastructure yet.
45
46 PR preprocessor/103026
47
48gcc/c-family/ChangeLog:
49
50 * c.opt (Wbidi-chars, Wbidi-chars=): New option.
51
52gcc/ChangeLog:
53
54 * doc/invoke.texi: Document -Wbidi-chars.
55
56libcpp/ChangeLog:
57
58 * include/cpplib.h (enum cpp_bidirectional_level): New.
59 (struct cpp_options): Add cpp_warn_bidirectional.
60 (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL.
61 * internal.h (struct cpp_reader): Add warn_bidi_p member
62 function.
63 * init.c (cpp_create_reader): Set cpp_warn_bidirectional.
64 * lex.c (bidi): New namespace.
65 (get_bidi_utf8): New function.
66 (get_bidi_ucn): Likewise.
67 (maybe_warn_bidi_on_close): Likewise.
68 (maybe_warn_bidi_on_char): Likewise.
69 (_cpp_skip_block_comment): Implement warning about bidirectional
70 control characters.
71 (skip_line_comment): Likewise.
72 (forms_identifier_p): Likewise.
73 (lex_identifier): Likewise.
74 (lex_string): Likewise.
75 (lex_raw_string): Likewise.
76
77gcc/testsuite/ChangeLog:
78
79 * c-c++-common/Wbidi-chars-1.c: New test.
80 * c-c++-common/Wbidi-chars-2.c: New test.
81 * c-c++-common/Wbidi-chars-3.c: New test.
82 * c-c++-common/Wbidi-chars-4.c: New test.
83 * c-c++-common/Wbidi-chars-5.c: New test.
84 * c-c++-common/Wbidi-chars-6.c: New test.
85 * c-c++-common/Wbidi-chars-7.c: New test.
86 * c-c++-common/Wbidi-chars-8.c: New test.
87 * c-c++-common/Wbidi-chars-9.c: New test.
88 * c-c++-common/Wbidi-chars-10.c: New test.
89 * c-c++-common/Wbidi-chars-11.c: New test.
90 * c-c++-common/Wbidi-chars-12.c: New test.
91 * c-c++-common/Wbidi-chars-13.c: New test.
92 * c-c++-common/Wbidi-chars-14.c: New test.
93 * c-c++-common/Wbidi-chars-15.c: New test.
94 * c-c++-common/Wbidi-chars-16.c: New test.
95 * c-c++-common/Wbidi-chars-17.c: New test.
96
97CVE: CVE-2021-42574
98Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=51c500269bf53749b107807d84271385fad35628]
99Signed-off-by: Pgowda <pgowda.cve@gmail.com>
100
101---
102 gcc/c-family/c.opt | 24 ++
103 gcc/doc/invoke.texi | 21 +-
104 gcc/testsuite/c-c++-common/Wbidi-chars-1.c | 12 +
105 gcc/testsuite/c-c++-common/Wbidi-chars-10.c | 27 ++
106 gcc/testsuite/c-c++-common/Wbidi-chars-11.c | 13 +
107 gcc/testsuite/c-c++-common/Wbidi-chars-12.c | 19 +
108 gcc/testsuite/c-c++-common/Wbidi-chars-13.c | 17 +
109 gcc/testsuite/c-c++-common/Wbidi-chars-14.c | 38 ++
110 gcc/testsuite/c-c++-common/Wbidi-chars-15.c | 59 +++
111 gcc/testsuite/c-c++-common/Wbidi-chars-16.c | 26 ++
112 gcc/testsuite/c-c++-common/Wbidi-chars-17.c | 30 ++
113 gcc/testsuite/c-c++-common/Wbidi-chars-2.c | 9 +
114 gcc/testsuite/c-c++-common/Wbidi-chars-3.c | 11 +
115 gcc/testsuite/c-c++-common/Wbidi-chars-4.c | 188 +++++++++
116 gcc/testsuite/c-c++-common/Wbidi-chars-5.c | 188 +++++++++
117 gcc/testsuite/c-c++-common/Wbidi-chars-6.c | 155 ++++++++
118 gcc/testsuite/c-c++-common/Wbidi-chars-7.c | 9 +
119 gcc/testsuite/c-c++-common/Wbidi-chars-8.c | 13 +
120 gcc/testsuite/c-c++-common/Wbidi-chars-9.c | 29 ++
121 libcpp/include/cpplib.h | 18 +-
122 libcpp/init.c | 1 +
123 libcpp/internal.h | 7 +
124 libcpp/lex.c | 408 +++++++++++++++++++-
125 23 files changed, 1315 insertions(+), 7 deletions(-)
126 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-1.c
127 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-10.c
128 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-11.c
129 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-12.c
130 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-13.c
131 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-14.c
132 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-15.c
133 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-16.c
134 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-17.c
135 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-2.c
136 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-3.c
137 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-4.c
138 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-5.c
139 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-6.c
140 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-7.c
141 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-8.c
142 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-9.c
143
144diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
145--- a/gcc/c-family/c.opt 2021-12-25 01:29:12.915317374 -0800
146+++ b/gcc/c-family/c.opt 2021-12-25 01:36:22.040018701 -0800
147@@ -350,6 +350,30 @@ Wbad-function-cast
148 C ObjC Var(warn_bad_function_cast) Warning
149 Warn about casting functions to incompatible types.
150
151+Wbidi-chars
152+C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none)
153+;
154+
155+Wbidi-chars=
156+C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level)
157+-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters.
158+
159+; Required for these enum values.
160+SourceInclude
161+cpplib.h
162+
163+Enum
164+Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized)
165+
166+EnumValue
167+Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none)
168+
169+EnumValue
170+Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired)
171+
172+EnumValue
173+Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any)
174+
175 Wbool-compare
176 C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
177 Warn about boolean expression compared with an integer value different from true/false.
178diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
179--- a/gcc/doc/invoke.texi 2021-12-25 01:35:33.284883488 -0800
180+++ b/gcc/doc/invoke.texi 2021-12-25 01:36:22.048018559 -0800
181@@ -310,7 +310,9 @@ Objective-C and Objective-C++ Dialects}.
182 -Warith-conversion @gol
183 -Warray-bounds -Warray-bounds=@var{n} @gol
184 -Wno-attributes -Wattribute-alias=@var{n} -Wno-attribute-alias @gol
185--Wno-attribute-warning -Wbool-compare -Wbool-operation @gol
186+-Wno-attribute-warning @gol
187+-Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol
188+-Wbool-compare -Wbool-operation @gol
189 -Wno-builtin-declaration-mismatch @gol
190 -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol
191 -Wc11-c2x-compat @gol
192@@ -6860,6 +6862,23 @@ Attributes considered include @code{allo
193 This is the default. You can disable these warnings with either
194 @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}.
195
196+@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]}
197+@opindex Wbidi-chars=
198+@opindex Wbidi-chars
199+@opindex Wno-bidi-chars
200+Warn about possibly misleading UTF-8 bidirectional control characters in
201+comments, string literals, character constants, and identifiers. Such
202+characters can change left-to-right writing direction into right-to-left
203+(and vice versa), which can cause confusion between the logical order and
204+visual order. This may be dangerous; for instance, it may seem that a piece
205+of code is not commented out, whereas it in fact is.
206+
207+There are three levels of warning supported by GCC@. The default is
208+@option{-Wbidi-chars=unpaired}, which warns about improperly terminated
209+bidi contexts. @option{-Wbidi-chars=none} turns the warning off.
210+@option{-Wbidi-chars=any} warns about any use of bidirectional control
211+characters.
212+
213 @item -Wbool-compare
214 @opindex Wno-bool-compare
215 @opindex Wbool-compare
216diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
217--- a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c 1969-12-31 16:00:00.000000000 -0800
218+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c 2021-12-25 01:36:22.048018559 -0800
219@@ -0,0 +1,27 @@
220+/* PR preprocessor/103026 */
221+/* { dg-do compile } */
222+/* { dg-options "-Wbidi-chars=unpaired" } */
223+/* More nesting testing. */
224+
225+/* RLE‫ LRI⁦ PDF‬ PDI⁩*/
226+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
227+int LRE_\u202a_PDF_\u202c;
228+int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c;
229+int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069;
230+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
231+int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069;
232+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
233+int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c;
234+int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c;
235+int FSI_\u2068;
236+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
237+int FSI_\u2068_PDI_\u2069;
238+int FSI_\u2068_FSI_\u2068_PDI_\u2069;
239+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
240+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
241+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
242+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
243+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c;
244+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
245+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
246+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
247diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
248--- a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c 1969-12-31 16:00:00.000000000 -0800
249+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c 2021-12-25 01:36:22.048018559 -0800
250@@ -0,0 +1,13 @@
251+/* PR preprocessor/103026 */
252+/* { dg-do compile } */
253+/* { dg-options "-Wbidi-chars=unpaired" } */
254+/* Test that we warn when mixing UCN and UTF-8. */
255+
256+int LRE_‪_PDF_\u202c;
257+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
258+int LRE_\u202a_PDF_‬_;
259+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
260+const char *s1 = "LRE_‪_PDF_\u202c";
261+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
262+const char *s2 = "LRE_\u202a_PDF_‬";
263+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
264diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
265--- a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c 1969-12-31 16:00:00.000000000 -0800
266+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c 2021-12-25 01:36:22.048018559 -0800
267@@ -0,0 +1,19 @@
268+/* PR preprocessor/103026 */
269+/* { dg-do compile { target { c || c++11 } } } */
270+/* { dg-options "-Wbidi-chars=any" } */
271+/* Test raw strings. */
272+
273+const char *s1 = R"(a b c LRE‪ 1 2 3 PDF‬ x y z)";
274+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
275+const char *s2 = R"(a b c RLE‫ 1 2 3 PDF‬ x y z)";
276+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
277+const char *s3 = R"(a b c LRO‭ 1 2 3 PDF‬ x y z)";
278+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
279+const char *s4 = R"(a b c RLO‮ 1 2 3 PDF‬ x y z)";
280+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
281+const char *s7 = R"(a b c FSI⁨ 1 2 3 PDI⁩ x y) z";
282+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
283+const char *s8 = R"(a b c PDI⁩ x y )z";
284+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
285+const char *s9 = R"(a b c PDF‬ x y z)";
286+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
287diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-13.c b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
288--- a/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 1969-12-31 16:00:00.000000000 -0800
289+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 2021-12-25 01:36:22.048018559 -0800
290@@ -0,0 +1,17 @@
291+/* PR preprocessor/103026 */
292+/* { dg-do compile { target { c || c++11 } } } */
293+/* { dg-options "-Wbidi-chars=unpaired" } */
294+/* Test raw strings. */
295+
296+const char *s1 = R"(a b c LRE‪ 1 2 3)";
297+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
298+const char *s2 = R"(a b c RLE‫ 1 2 3)";
299+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
300+const char *s3 = R"(a b c LRO‭ 1 2 3)";
301+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
302+const char *s4 = R"(a b c FSI⁨ 1 2 3)";
303+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
304+const char *s5 = R"(a b c LRI⁦ 1 2 3)";
305+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
306+const char *s6 = R"(a b c RLI⁧ 1 2 3)";
307+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
308diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
309--- a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c 1969-12-31 16:00:00.000000000 -0800
310+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c 2021-12-25 01:36:22.048018559 -0800
311@@ -0,0 +1,38 @@
312+/* PR preprocessor/103026 */
313+/* { dg-do compile } */
314+/* { dg-options "-Wbidi-chars=unpaired" } */
315+/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs,
316+ or RLOs. */
317+
318+/* LRI_⁦_LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩*/
319+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
320+// LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩
321+// LRI_⁦_RLO_‮_RLE_‫_RLE_‫_PDI_⁩
322+// LRI_⁦_RLO_‮_RLE_‫_PDI_⁩
323+// FSI_⁨_RLO_‮_PDI_⁩
324+// FSI_⁨_FSI_⁨_RLO_‮_PDI_⁩
325+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
326+
327+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069;
328+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
329+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
330+int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
331+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
332+int PDI_\u2069;
333+int LRI_\u2066_PDI_\u2069;
334+int RLI_\u2067_PDI_\u2069;
335+int LRE_\u202a_LRI_\u2066_PDI_\u2069;
336+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
337+int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069;
338+int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
339+int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
340+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
341+int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
342+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
343+int RLO_\u202e_PDI_\u2069;
344+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
345+int RLI_\u2067_PDI_\u2069_RLI_\u2067;
346+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
347+int FSI_\u2068_PDF_\u202c_PDI_\u2069;
348+int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069;
349+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
350diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
351--- a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c 1969-12-31 16:00:00.000000000 -0800
352+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c 2021-12-25 01:36:22.048018559 -0800
353@@ -0,0 +1,59 @@
354+/* PR preprocessor/103026 */
355+/* { dg-do compile } */
356+/* { dg-options "-Wbidi-chars=unpaired" } */
357+/* Test unpaired bidi control chars in multiline comments. */
358+
359+/*
360+ * LRE‪ end
361+ */
362+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
363+/*
364+ * RLE‫ end
365+ */
366+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
367+/*
368+ * LRO‭ end
369+ */
370+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
371+/*
372+ * RLO‮ end
373+ */
374+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
375+/*
376+ * LRI⁦ end
377+ */
378+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
379+/*
380+ * RLI⁧ end
381+ */
382+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
383+/*
384+ * FSI⁨ end
385+ */
386+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
387+/* LRE‪
388+ PDF‬ */
389+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
390+/* FSI⁨
391+ PDI⁩ */
392+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
393+
394+/* LRE<‪>
395+ *
396+ */
397+/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */
398+
399+/*
400+ * LRE<‪>
401+ */
402+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
403+
404+/*
405+ *
406+ * LRE<‪> */
407+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
408+
409+/* RLI<⁧> */ /* PDI<⁩> */
410+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
411+/* LRE<‪> */ /* PDF<‬> */
412+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
413diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
414--- a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c 1969-12-31 16:00:00.000000000 -0800
415+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c 2021-12-25 01:36:22.048018559 -0800
416@@ -0,0 +1,26 @@
417+/* PR preprocessor/103026 */
418+/* { dg-do compile } */
419+/* { dg-options "-Wbidi-chars=any" } */
420+/* Test LTR/RTL chars. */
421+
422+/* LTR<‎> */
423+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
424+// LTR<‎>
425+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
426+/* RTL<‏> */
427+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
428+// RTL<‏>
429+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
430+
431+const char *s1 = "LTR<‎>";
432+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
433+const char *s2 = "LTR\u200e";
434+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
435+const char *s3 = "LTR\u200E";
436+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
437+const char *s4 = "RTL<‏>";
438+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
439+const char *s5 = "RTL\u200f";
440+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
441+const char *s6 = "RTL\u200F";
442+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
443diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
444--- a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c 1969-12-31 16:00:00.000000000 -0800
445+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c 2021-12-25 01:36:22.048018559 -0800
446@@ -0,0 +1,30 @@
447+/* PR preprocessor/103026 */
448+/* { dg-do compile } */
449+/* { dg-options "-Wbidi-chars=unpaired" } */
450+/* Test LTR/RTL chars. */
451+
452+/* LTR<‎> */
453+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
454+// LTR<‎>
455+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
456+/* RTL<‏> */
457+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
458+// RTL<‏>
459+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
460+int ltr_\u200e;
461+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
462+int rtl_\u200f;
463+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
464+
465+const char *s1 = "LTR<‎>";
466+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
467+const char *s2 = "LTR\u200e";
468+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
469+const char *s3 = "LTR\u200E";
470+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
471+const char *s4 = "RTL<‏>";
472+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
473+const char *s5 = "RTL\u200f";
474+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
475+const char *s6 = "RTL\u200F";
476+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
477diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
478--- a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c 1969-12-31 16:00:00.000000000 -0800
479+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c 2021-12-25 01:36:22.048018559 -0800
480@@ -0,0 +1,12 @@
481+/* PR preprocessor/103026 */
482+/* { dg-do compile } */
483+
484+int main() {
485+ int isAdmin = 0;
486+ /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
487+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
488+ __builtin_printf("You are an admin.\n");
489+ /* end admins only ‮ { ⁦*/
490+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
491+ return 0;
492+}
493diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
494--- a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c 1969-12-31 16:00:00.000000000 -0800
495+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c 2021-12-25 01:36:22.048018559 -0800
496@@ -0,0 +1,9 @@
497+/* PR preprocessor/103026 */
498+/* { dg-do compile } */
499+
500+int main() {
501+ /* Say hello; newline⁧/*/ return 0 ;
502+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
503+ __builtin_printf("Hello world.\n");
504+ return 0;
505+}
506diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
507--- a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c 1969-12-31 16:00:00.000000000 -0800
508+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c 2021-12-25 01:36:22.048018559 -0800
509@@ -0,0 +1,11 @@
510+/* PR preprocessor/103026 */
511+/* { dg-do compile } */
512+
513+int main() {
514+ const char* access_level = "user";
515+ if (__builtin_strcmp(access_level, "user‮ ⁦// Check if admin⁩ ⁦")) {
516+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
517+ __builtin_printf("You are an admin.\n");
518+ }
519+ return 0;
520+}
521diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
522--- a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c 1969-12-31 16:00:00.000000000 -0800
523+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c 2021-12-25 01:36:22.048018559 -0800
524@@ -0,0 +1,188 @@
525+/* PR preprocessor/103026 */
526+/* { dg-do compile } */
527+/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */
528+/* Test all bidi chars in various contexts (identifiers, comments,
529+ string literals, character constants), both UCN and UTF-8. The bidi
530+ chars here are properly terminated, except for the character constants. */
531+
532+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
533+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
534+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
535+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
536+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
537+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
538+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
539+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
540+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
541+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
542+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
543+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
544+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
545+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
546+
547+/* Same but C++ comments instead. */
548+// a b c LRE‪ 1 2 3 PDF‬ x y z
549+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
550+// a b c RLE‫ 1 2 3 PDF‬ x y z
551+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
552+// a b c LRO‭ 1 2 3 PDF‬ x y z
553+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
554+// a b c RLO‮ 1 2 3 PDF‬ x y z
555+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
556+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
557+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
558+// a b c RLI⁧ 1 2 3 PDI⁩ x y
559+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
560+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
561+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
562+
563+/* Here we're closing an unopened context, warn when =any. */
564+/* a b c PDI⁩ x y z */
565+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
566+/* a b c PDF‬ x y z */
567+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
568+// a b c PDI⁩ x y z
569+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
570+// a b c PDF‬ x y z
571+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
572+
573+/* Multiline comments. */
574+/* a b c PDI⁩ x y z
575+ */
576+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
577+/* a b c PDF‬ x y z
578+ */
579+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
580+/* first
581+ a b c PDI⁩ x y z
582+ */
583+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
584+/* first
585+ a b c PDF‬ x y z
586+ */
587+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
588+/* first
589+ a b c PDI⁩ x y z */
590+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
591+/* first
592+ a b c PDF‬ x y z */
593+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
594+
595+void
596+g1 ()
597+{
598+ const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
599+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
600+ const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
601+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
602+ const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
603+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
604+ const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
605+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
606+ const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
607+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
608+ const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
609+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
610+ const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
611+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
612+ const char *s8 = "a b c PDI⁩ x y z";
613+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
614+ const char *s9 = "a b c PDF‬ x y z";
615+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
616+
617+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
618+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
619+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
620+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
621+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
622+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
623+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
624+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
625+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
626+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
627+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
628+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
629+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
630+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
631+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
632+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
633+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
634+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
635+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
636+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
637+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
638+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
639+}
640+
641+void
642+g2 ()
643+{
644+ const char c1 = '\u202a';
645+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
646+ const char c2 = '\u202A';
647+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
648+ const char c3 = '\u202b';
649+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
650+ const char c4 = '\u202B';
651+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
652+ const char c5 = '\u202d';
653+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
654+ const char c6 = '\u202D';
655+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
656+ const char c7 = '\u202e';
657+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
658+ const char c8 = '\u202E';
659+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
660+ const char c9 = '\u2066';
661+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
662+ const char c10 = '\u2067';
663+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
664+ const char c11 = '\u2068';
665+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
666+}
667+
668+int a‪b‬c;
669+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
670+int a‫b‬c;
671+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
672+int a‭b‬c;
673+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
674+int a‮b‬c;
675+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
676+int a⁦b⁩c;
677+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
678+int a⁧b⁩c;
679+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
680+int a⁨b⁩c;
681+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
682+int A‬X;
683+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
684+int A\u202cY;
685+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
686+int A\u202CY2;
687+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
688+
689+int d\u202ae\u202cf;
690+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
691+int d\u202Ae\u202cf2;
692+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
693+int d\u202be\u202cf;
694+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
695+int d\u202Be\u202cf2;
696+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
697+int d\u202de\u202cf;
698+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
699+int d\u202De\u202cf2;
700+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
701+int d\u202ee\u202cf;
702+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
703+int d\u202Ee\u202cf2;
704+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
705+int d\u2066e\u2069f;
706+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
707+int d\u2067e\u2069f;
708+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
709+int d\u2068e\u2069f;
710+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
711+int X\u2069;
712+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
713diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
714--- a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c 1969-12-31 16:00:00.000000000 -0800
715+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c 2021-12-25 01:36:22.048018559 -0800
716@@ -0,0 +1,188 @@
717+/* PR preprocessor/103026 */
718+/* { dg-do compile } */
719+/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */
720+/* Test all bidi chars in various contexts (identifiers, comments,
721+ string literals, character constants), both UCN and UTF-8. The bidi
722+ chars here are properly terminated, except for the character constants. */
723+
724+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
725+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
726+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
727+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
728+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
729+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
730+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
731+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
732+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
733+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
734+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
735+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
736+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
737+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
738+
739+/* Same but C++ comments instead. */
740+// a b c LRE‪ 1 2 3 PDF‬ x y z
741+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
742+// a b c RLE‫ 1 2 3 PDF‬ x y z
743+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
744+// a b c LRO‭ 1 2 3 PDF‬ x y z
745+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
746+// a b c RLO‮ 1 2 3 PDF‬ x y z
747+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
748+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
749+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
750+// a b c RLI⁧ 1 2 3 PDI⁩ x y
751+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
752+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
753+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
754+
755+/* Here we're closing an unopened context, warn when =any. */
756+/* a b c PDI⁩ x y z */
757+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
758+/* a b c PDF‬ x y z */
759+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
760+// a b c PDI⁩ x y z
761+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
762+// a b c PDF‬ x y z
763+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
764+
765+/* Multiline comments. */
766+/* a b c PDI⁩ x y z
767+ */
768+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
769+/* a b c PDF‬ x y z
770+ */
771+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
772+/* first
773+ a b c PDI⁩ x y z
774+ */
775+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
776+/* first
777+ a b c PDF‬ x y z
778+ */
779+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
780+/* first
781+ a b c PDI⁩ x y z */
782+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
783+/* first
784+ a b c PDF‬ x y z */
785+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
786+
787+void
788+g1 ()
789+{
790+ const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
791+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
792+ const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
793+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
794+ const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
795+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
796+ const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
797+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
798+ const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
799+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
800+ const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
801+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
802+ const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
803+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
804+ const char *s8 = "a b c PDI⁩ x y z";
805+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
806+ const char *s9 = "a b c PDF‬ x y z";
807+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
808+
809+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
810+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
811+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
812+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
813+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
814+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
815+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
816+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
817+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
818+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
819+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
820+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
821+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
822+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
823+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
824+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
825+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
826+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
827+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
828+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
829+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
830+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
831+}
832+
833+void
834+g2 ()
835+{
836+ const char c1 = '\u202a';
837+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
838+ const char c2 = '\u202A';
839+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
840+ const char c3 = '\u202b';
841+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
842+ const char c4 = '\u202B';
843+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
844+ const char c5 = '\u202d';
845+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
846+ const char c6 = '\u202D';
847+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
848+ const char c7 = '\u202e';
849+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
850+ const char c8 = '\u202E';
851+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
852+ const char c9 = '\u2066';
853+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
854+ const char c10 = '\u2067';
855+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
856+ const char c11 = '\u2068';
857+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
858+}
859+
860+int a‪b‬c;
861+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
862+int a‫b‬c;
863+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
864+int a‭b‬c;
865+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
866+int a‮b‬c;
867+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
868+int a⁦b⁩c;
869+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
870+int a⁧b⁩c;
871+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
872+int a⁨b⁩c;
873+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
874+int A‬X;
875+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
876+int A\u202cY;
877+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
878+int A\u202CY2;
879+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
880+
881+int d\u202ae\u202cf;
882+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
883+int d\u202Ae\u202cf2;
884+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
885+int d\u202be\u202cf;
886+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
887+int d\u202Be\u202cf2;
888+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
889+int d\u202de\u202cf;
890+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
891+int d\u202De\u202cf2;
892+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
893+int d\u202ee\u202cf;
894+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
895+int d\u202Ee\u202cf2;
896+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
897+int d\u2066e\u2069f;
898+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
899+int d\u2067e\u2069f;
900+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
901+int d\u2068e\u2069f;
902+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
903+int X\u2069;
904+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
905diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
906--- a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c 1969-12-31 16:00:00.000000000 -0800
907+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c 2021-12-25 01:36:22.052018489 -0800
908@@ -0,0 +1,155 @@
909+/* PR preprocessor/103026 */
910+/* { dg-do compile } */
911+/* { dg-options "-Wbidi-chars=unpaired" } */
912+/* Test nesting of bidi chars in various contexts. */
913+
914+/* Terminated by the wrong char: */
915+/* a b c LRE‪ 1 2 3 PDI⁩ x y z */
916+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
917+/* a b c RLE‫ 1 2 3 PDI⁩ x y z*/
918+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
919+/* a b c LRO‭ 1 2 3 PDI⁩ x y z */
920+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
921+/* a b c RLO‮ 1 2 3 PDI⁩ x y z */
922+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
923+/* a b c LRI⁦ 1 2 3 PDF‬ x y z */
924+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
925+/* a b c RLI⁧ 1 2 3 PDF‬ x y z */
926+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
927+/* a b c FSI⁨ 1 2 3 PDF‬ x y z*/
928+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
929+
930+/* LRE‪ PDF‬ */
931+/* LRE‪ LRE‪ PDF‬ PDF‬ */
932+/* PDF‬ LRE‪ PDF‬ */
933+/* LRE‪ PDF‬ LRE‪ PDF‬ */
934+/* LRE‪ LRE‪ PDF‬ */
935+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
936+/* PDF‬ LRE‪ */
937+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
938+
939+// a b c LRE‪ 1 2 3 PDI⁩ x y z
940+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
941+// a b c RLE‫ 1 2 3 PDI⁩ x y z*/
942+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
943+// a b c LRO‭ 1 2 3 PDI⁩ x y z
944+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
945+// a b c RLO‮ 1 2 3 PDI⁩ x y z
946+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
947+// a b c LRI⁦ 1 2 3 PDF‬ x y z
948+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
949+// a b c RLI⁧ 1 2 3 PDF‬ x y z
950+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
951+// a b c FSI⁨ 1 2 3 PDF‬ x y z
952+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
953+
954+// LRE‪ PDF‬
955+// LRE‪ LRE‪ PDF‬ PDF‬
956+// PDF‬ LRE‪ PDF‬
957+// LRE‪ PDF‬ LRE‪ PDF‬
958+// LRE‪ LRE‪ PDF‬
959+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
960+// PDF‬ LRE‪
961+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
962+
963+void
964+g1 ()
965+{
966+ const char *s1 = "a b c LRE‪ 1 2 3 PDI⁩ x y z";
967+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
968+ const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z";
969+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
970+ const char *s3 = "a b c RLE‫ 1 2 3 PDI⁩ x y ";
971+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
972+ const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z";
973+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
974+ const char *s5 = "a b c LRO‭ 1 2 3 PDI⁩ x y z";
975+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
976+ const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z";
977+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
978+ const char *s7 = "a b c RLO‮ 1 2 3 PDI⁩ x y z";
979+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
980+ const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z";
981+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
982+ const char *s9 = "a b c LRI⁦ 1 2 3 PDF‬ x y z";
983+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
984+ const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z";
985+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
986+ const char *s11 = "a b c RLI⁧ 1 2 3 PDF‬ x y z\
987+ ";
988+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
989+ const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z";
990+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
991+ const char *s13 = "a b c FSI⁨ 1 2 3 PDF‬ x y z";
992+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
993+ const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z";
994+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
995+ const char *s15 = "PDF‬ LRE‪";
996+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
997+ const char *s16 = "PDF\u202c LRE\u202a";
998+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
999+ const char *s17 = "LRE‪ PDF‬";
1000+ const char *s18 = "LRE\u202a PDF\u202c";
1001+ const char *s19 = "LRE‪ LRE‪ PDF‬ PDF‬";
1002+ const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c";
1003+ const char *s21 = "PDF‬ LRE‪ PDF‬";
1004+ const char *s22 = "PDF\u202c LRE\u202a PDF\u202c";
1005+ const char *s23 = "LRE‪ LRE‪ PDF‬";
1006+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1007+ const char *s24 = "LRE\u202a LRE\u202a PDF\u202c";
1008+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1009+ const char *s25 = "PDF‬ LRE‪";
1010+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1011+ const char *s26 = "PDF\u202c LRE\u202a";
1012+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1013+ const char *s27 = "PDF‬ LRE\u202a";
1014+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1015+ const char *s28 = "PDF\u202c LRE‪";
1016+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1017+}
1018+
1019+int aLRE‪bPDI⁩;
1020+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1021+int A\u202aB\u2069C;
1022+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1023+int aRLE‫bPDI⁩;
1024+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1025+int a\u202bB\u2069c;
1026+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1027+int aLRO‭bPDI⁩;
1028+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1029+int a\u202db\u2069c2;
1030+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1031+int aRLO‮bPDI⁩;
1032+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1033+int a\u202eb\u2069;
1034+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1035+int aLRI⁦bPDF‬;
1036+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1037+int a\u2066b\u202c;
1038+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1039+int aRLI⁧bPDF‬c
1040+;
1041+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
1042+int a\u2067b\u202c;
1043+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1044+int aFSI⁨bPDF‬;
1045+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1046+int a\u2068b\u202c;
1047+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1048+int aFSI⁨bPD\u202C;
1049+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1050+int aFSI\u2068bPDF‬_;
1051+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1052+int aLRE‪bPDF‬b;
1053+int A\u202aB\u202c;
1054+int a_LRE‪_LRE‪_b_PDF‬_PDF‬;
1055+int A\u202aA\u202aB\u202cB\u202c;
1056+int aPDF‬bLREadPDF‬;
1057+int a_\u202C_\u202a_\u202c;
1058+int a_LRE‪_b_PDF‬_c_LRE‪_PDF‬;
1059+int a_\u202a_\u202c_\u202a_\u202c_;
1060+int a_LRE‪_b_PDF‬_c_LRE‪;
1061+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1062+int a_\u202a_\u202c_\u202a_;
1063+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1064diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
1065--- a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c 1969-12-31 16:00:00.000000000 -0800
1066+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c 2021-12-25 01:36:22.052018489 -0800
1067@@ -0,0 +1,9 @@
1068+/* PR preprocessor/103026 */
1069+/* { dg-do compile } */
1070+/* { dg-options "-Wbidi-chars=any" } */
1071+/* Test we ignore UCNs in comments. */
1072+
1073+// a b c \u202a 1 2 3
1074+// a b c \u202A 1 2 3
1075+/* a b c \u202a 1 2 3 */
1076+/* a b c \u202A 1 2 3 */
1077diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
1078--- a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c 1969-12-31 16:00:00.000000000 -0800
1079+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c 2021-12-25 01:36:22.052018489 -0800
1080@@ -0,0 +1,13 @@
1081+/* PR preprocessor/103026 */
1082+/* { dg-do compile } */
1083+/* { dg-options "-Wbidi-chars=any" } */
1084+/* Test \u vs \U. */
1085+
1086+int a_\u202A;
1087+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
1088+int a_\u202a_2;
1089+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
1090+int a_\U0000202A_3;
1091+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
1092+int a_\U0000202a_4;
1093+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
1094diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
1095--- a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c 1969-12-31 16:00:00.000000000 -0800
1096+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c 2021-12-25 01:36:22.052018489 -0800
1097@@ -0,0 +1,29 @@
1098+/* PR preprocessor/103026 */
1099+/* { dg-do compile } */
1100+/* { dg-options "-Wbidi-chars=unpaired" } */
1101+/* Test that we properly separate bidi contexts (comment/identifier/character
1102+ constant/string literal). */
1103+
1104+/* LRE ->‪<- */ int pdf_\u202c_1;
1105+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1106+/* RLE ->‫<- */ int pdf_\u202c_2;
1107+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1108+/* LRO ->‭<- */ int pdf_\u202c_3;
1109+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1110+/* RLO ->‮<- */ int pdf_\u202c_4;
1111+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1112+/* LRI ->⁦<-*/ int pdi_\u2069_1;
1113+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1114+/* RLI ->⁧<- */ int pdi_\u2069_12;
1115+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1116+/* FSI ->⁨<- */ int pdi_\u2069_3;
1117+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1118+
1119+const char *s1 = "LRE\u202a"; /* PDF ->‬<- */
1120+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1121+/* LRE ->‪<- */ const char *s2 = "PDF\u202c";
1122+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1123+const char *s3 = "LRE\u202a"; int pdf_\u202c_5;
1124+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1125+int lre_\u202a; const char *s4 = "PDF\u202c";
1126+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
1127diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
1128--- a/libcpp/include/cpplib.h 2021-12-25 01:35:33.288883417 -0800
1129+++ b/libcpp/include/cpplib.h 2021-12-25 01:36:22.052018489 -0800
1130@@ -308,6 +308,17 @@ enum cpp_normalize_level {
1131 normalized_none
1132 };
1133
1134+/* The possible bidirectional control characters checking levels, from least
1135+ restrictive to most. */
1136+enum cpp_bidirectional_level {
1137+ /* No checking. */
1138+ bidirectional_none,
1139+ /* Only detect unpaired uses of bidirectional control characters. */
1140+ bidirectional_unpaired,
1141+ /* Detect any use of bidirectional control characters. */
1142+ bidirectional_any
1143+};
1144+
1145 /* This structure is nested inside struct cpp_reader, and
1146 carries all the options visible to the command line. */
1147 struct cpp_options
1148@@ -515,6 +526,10 @@ struct cpp_options
1149 /* True if warn about differences between C++98 and C++11. */
1150 bool cpp_warn_cxx11_compat;
1151
1152+ /* Nonzero if bidirectional control characters checking is on. See enum
1153+ cpp_bidirectional_level. */
1154+ unsigned char cpp_warn_bidirectional;
1155+
1156 /* Dependency generation. */
1157 struct
1158 {
1159@@ -613,7 +628,8 @@ enum cpp_warning_reason {
1160 CPP_W_C90_C99_COMPAT,
1161 CPP_W_C11_C2X_COMPAT,
1162 CPP_W_CXX11_COMPAT,
1163- CPP_W_EXPANSION_TO_DEFINED
1164+ CPP_W_EXPANSION_TO_DEFINED,
1165+ CPP_W_BIDIRECTIONAL
1166 };
1167
1168 /* Callback for header lookup for HEADER, which is the name of a
1169diff --git a/libcpp/init.c b/libcpp/init.c
1170--- a/libcpp/init.c 2021-12-25 01:29:12.931317107 -0800
1171+++ b/libcpp/init.c 2021-12-25 01:36:22.052018489 -0800
1172@@ -215,6 +215,7 @@ cpp_create_reader (enum c_lang lang, cpp
1173 = ENABLE_CANONICAL_SYSTEM_HEADERS;
1174 CPP_OPTION (pfile, ext_numeric_literals) = 1;
1175 CPP_OPTION (pfile, warn_date_time) = 0;
1176+ CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
1177
1178 /* Default CPP arithmetic to something sensible for the host for the
1179 benefit of dumb users like fix-header. */
1180diff --git a/libcpp/internal.h b/libcpp/internal.h
1181--- a/libcpp/internal.h 2021-12-25 01:35:33.288883417 -0800
1182+++ b/libcpp/internal.h 2021-12-25 01:36:22.052018489 -0800
1183@@ -581,6 +581,10 @@ struct cpp_reader
1184 /* If non-zero, the lexer will use this location for the next token
1185 instead of getting a location from the linemap. */
1186 location_t forced_token_location;
1187+ bool warn_bidi_p () const
1188+ {
1189+ return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none;
1190+ }
1191 };
1192
1193 /* Character classes. Based on the more primitive macros in safe-ctype.h.
1194diff --git a/libcpp/lex.c b/libcpp/lex.c
1195--- a/libcpp/lex.c 2021-12-25 01:35:33.288883417 -0800
1196+++ b/libcpp/lex.c 2021-12-25 01:36:22.052018489 -0800
1197@@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfi
1198 }
1199 }
1200
1201+namespace bidi {
1202+ enum class kind {
1203+ NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
1204+ };
1205+
1206+ /* All the UTF-8 encodings of bidi characters start with E2. */
1207+ constexpr uchar utf8_start = 0xe2;
1208+
1209+ /* A vector holding currently open bidi contexts. We use a char for
1210+ each context, its LSB is 1 if it represents a PDF context, 0 if it
1211+ represents a PDI context. The next bit is 1 if this context was open
1212+ by a bidi character written as a UCN, and 0 when it was UTF-8. */
1213+ semi_embedded_vec <unsigned char, 16> vec;
1214+
1215+ /* Close the whole comment/identifier/string literal/character constant
1216+ context. */
1217+ void on_close ()
1218+ {
1219+ vec.truncate (0);
1220+ }
1221+
1222+ /* Pop the last element in the vector. */
1223+ void pop ()
1224+ {
1225+ unsigned int len = vec.count ();
1226+ gcc_checking_assert (len > 0);
1227+ vec.truncate (len - 1);
1228+ }
1229+
1230+ /* Return the context of the Ith element. */
1231+ kind ctx_at (unsigned int i)
1232+ {
1233+ return (vec[i] & 1) ? kind::PDF : kind::PDI;
1234+ }
1235+
1236+ /* Return which context is currently opened. */
1237+ kind current_ctx ()
1238+ {
1239+ unsigned int len = vec.count ();
1240+ if (len == 0)
1241+ return kind::NONE;
1242+ return ctx_at (len - 1);
1243+ }
1244+
1245+ /* Return true if the current context comes from a UCN origin, that is,
1246+ the bidi char which started this bidi context was written as a UCN. */
1247+ bool current_ctx_ucn_p ()
1248+ {
1249+ unsigned int len = vec.count ();
1250+ gcc_checking_assert (len > 0);
1251+ return (vec[len - 1] >> 1) & 1;
1252+ }
1253+
1254+ /* We've read a bidi char, update the current vector as necessary. */
1255+ void on_char (kind k, bool ucn_p)
1256+ {
1257+ switch (k)
1258+ {
1259+ case kind::LRE:
1260+ case kind::RLE:
1261+ case kind::LRO:
1262+ case kind::RLO:
1263+ vec.push (ucn_p ? 3u : 1u);
1264+ break;
1265+ case kind::LRI:
1266+ case kind::RLI:
1267+ case kind::FSI:
1268+ vec.push (ucn_p ? 2u : 0u);
1269+ break;
1270+ /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
1271+ whose scope has not yet been terminated. */
1272+ case kind::PDF:
1273+ if (current_ctx () == kind::PDF)
1274+ pop ();
1275+ break;
1276+ /* PDI terminates the scope of the last LRI, RLI, or FSI whose
1277+ scope has not yet been terminated, as well as the scopes of
1278+ any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
1279+ yet been terminated. */
1280+ case kind::PDI:
1281+ for (int i = vec.count () - 1; i >= 0; --i)
1282+ if (ctx_at (i) == kind::PDI)
1283+ {
1284+ vec.truncate (i);
1285+ break;
1286+ }
1287+ break;
1288+ case kind::LTR:
1289+ case kind::RTL:
1290+ /* These aren't popped by a PDF/PDI. */
1291+ break;
1292+ [[likely]] case kind::NONE:
1293+ break;
1294+ default:
1295+ abort ();
1296+ }
1297+ }
1298+
1299+ /* Return a descriptive string for K. */
1300+ const char *to_str (kind k)
1301+ {
1302+ switch (k)
1303+ {
1304+ case kind::LRE:
1305+ return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
1306+ case kind::RLE:
1307+ return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
1308+ case kind::LRO:
1309+ return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
1310+ case kind::RLO:
1311+ return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
1312+ case kind::LRI:
1313+ return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
1314+ case kind::RLI:
1315+ return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
1316+ case kind::FSI:
1317+ return "U+2068 (FIRST STRONG ISOLATE)";
1318+ case kind::PDF:
1319+ return "U+202C (POP DIRECTIONAL FORMATTING)";
1320+ case kind::PDI:
1321+ return "U+2069 (POP DIRECTIONAL ISOLATE)";
1322+ case kind::LTR:
1323+ return "U+200E (LEFT-TO-RIGHT MARK)";
1324+ case kind::RTL:
1325+ return "U+200F (RIGHT-TO-LEFT MARK)";
1326+ default:
1327+ abort ();
1328+ }
1329+ }
1330+}
1331+
1332+/* Parse a sequence of 3 bytes starting with P and return its bidi code. */
1333+
1334+static bidi::kind
1335+get_bidi_utf8 (const unsigned char *const p)
1336+{
1337+ gcc_checking_assert (p[0] == bidi::utf8_start);
1338+
1339+ if (p[1] == 0x80)
1340+ switch (p[2])
1341+ {
1342+ case 0xaa:
1343+ return bidi::kind::LRE;
1344+ case 0xab:
1345+ return bidi::kind::RLE;
1346+ case 0xac:
1347+ return bidi::kind::PDF;
1348+ case 0xad:
1349+ return bidi::kind::LRO;
1350+ case 0xae:
1351+ return bidi::kind::RLO;
1352+ case 0x8e:
1353+ return bidi::kind::LTR;
1354+ case 0x8f:
1355+ return bidi::kind::RTL;
1356+ default:
1357+ break;
1358+ }
1359+ else if (p[1] == 0x81)
1360+ switch (p[2])
1361+ {
1362+ case 0xa6:
1363+ return bidi::kind::LRI;
1364+ case 0xa7:
1365+ return bidi::kind::RLI;
1366+ case 0xa8:
1367+ return bidi::kind::FSI;
1368+ case 0xa9:
1369+ return bidi::kind::PDI;
1370+ default:
1371+ break;
1372+ }
1373+
1374+ return bidi::kind::NONE;
1375+}
1376+
1377+/* Parse a UCN where P points just past \u or \U and return its bidi code. */
1378+
1379+static bidi::kind
1380+get_bidi_ucn (const unsigned char *p, bool is_U)
1381+{
1382+ /* 6.4.3 Universal Character Names
1383+ \u hex-quad
1384+ \U hex-quad hex-quad
1385+ where \unnnn means \U0000nnnn. */
1386+
1387+ if (is_U)
1388+ {
1389+ if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
1390+ return bidi::kind::NONE;
1391+ /* Skip 4B so we can treat \u and \U the same below. */
1392+ p += 4;
1393+ }
1394+
1395+ /* All code points we are looking for start with 20xx. */
1396+ if (p[0] != '2' || p[1] != '0')
1397+ return bidi::kind::NONE;
1398+ else if (p[2] == '2')
1399+ switch (p[3])
1400+ {
1401+ case 'a':
1402+ case 'A':
1403+ return bidi::kind::LRE;
1404+ case 'b':
1405+ case 'B':
1406+ return bidi::kind::RLE;
1407+ case 'c':
1408+ case 'C':
1409+ return bidi::kind::PDF;
1410+ case 'd':
1411+ case 'D':
1412+ return bidi::kind::LRO;
1413+ case 'e':
1414+ case 'E':
1415+ return bidi::kind::RLO;
1416+ default:
1417+ break;
1418+ }
1419+ else if (p[2] == '6')
1420+ switch (p[3])
1421+ {
1422+ case '6':
1423+ return bidi::kind::LRI;
1424+ case '7':
1425+ return bidi::kind::RLI;
1426+ case '8':
1427+ return bidi::kind::FSI;
1428+ case '9':
1429+ return bidi::kind::PDI;
1430+ default:
1431+ break;
1432+ }
1433+ else if (p[2] == '0')
1434+ switch (p[3])
1435+ {
1436+ case 'e':
1437+ case 'E':
1438+ return bidi::kind::LTR;
1439+ case 'f':
1440+ case 'F':
1441+ return bidi::kind::RTL;
1442+ default:
1443+ break;
1444+ }
1445+
1446+ return bidi::kind::NONE;
1447+}
1448+
1449+/* We're closing a bidi context, that is, we've encountered a newline,
1450+ are closing a C-style comment, or are at the end of a string literal,
1451+ character constant, or identifier. Warn if this context was not
1452+ properly terminated by a PDI or PDF. P points to the last character
1453+ in this context. */
1454+
1455+static void
1456+maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
1457+{
1458+ if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired
1459+ && bidi::vec.count () > 0)
1460+ {
1461+ const location_t loc
1462+ = linemap_position_for_column (pfile->line_table,
1463+ CPP_BUF_COLUMN (pfile->buffer, p));
1464+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
1465+ "unpaired UTF-8 bidirectional control character "
1466+ "detected");
1467+ }
1468+ /* We're done with this context. */
1469+ bidi::on_close ();
1470+}
1471+
1472+/* We're at the beginning or in the middle of an identifier/comment/string
1473+ literal/character constant. Warn if we've encountered a bidi character.
1474+ KIND says which bidi character it was; P points to it in the character
1475+ stream. UCN_P is true iff this bidi character was written as a UCN. */
1476+
1477+static void
1478+maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
1479+ bool ucn_p)
1480+{
1481+ if (__builtin_expect (kind == bidi::kind::NONE, 1))
1482+ return;
1483+
1484+ const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
1485+
1486+ if (warn_bidi != bidirectional_none)
1487+ {
1488+ const location_t loc
1489+ = linemap_position_for_column (pfile->line_table,
1490+ CPP_BUF_COLUMN (pfile->buffer, p));
1491+ /* It seems excessive to warn about a PDI/PDF that is closing
1492+ an opened context because we've already warned about the
1493+ opening character. Except warn when we have a UCN x UTF-8
1494+ mismatch. */
1495+ if (kind == bidi::current_ctx ())
1496+ {
1497+ if (warn_bidi == bidirectional_unpaired
1498+ && bidi::current_ctx_ucn_p () != ucn_p)
1499+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
1500+ "UTF-8 vs UCN mismatch when closing "
1501+ "a context by \"%s\"", bidi::to_str (kind));
1502+ }
1503+ else if (warn_bidi == bidirectional_any)
1504+ {
1505+ if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
1506+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
1507+ "\"%s\" is closing an unopened context",
1508+ bidi::to_str (kind));
1509+ else
1510+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
1511+ "found problematic Unicode character \"%s\"",
1512+ bidi::to_str (kind));
1513+ }
1514+ }
1515+ /* We're done with this context. */
1516+ bidi::on_char (kind, ucn_p);
1517+}
1518+
1519 /* Skip a C-style block comment. We find the end of the comment by
1520 seeing if an asterisk is before every '/' we encounter. Returns
1521 nonzero if comment terminated by EOF, zero otherwise.
1522@@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfi
1523 cpp_buffer *buffer = pfile->buffer;
1524 const uchar *cur = buffer->cur;
1525 uchar c;
1526+ const bool warn_bidi_p = pfile->warn_bidi_p ();
1527
1528 cur++;
1529 if (*cur == '/')
1530@@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfi
1531 if (c == '/')
1532 {
1533 if (cur[-2] == '*')
1534- break;
1535+ {
1536+ if (warn_bidi_p)
1537+ maybe_warn_bidi_on_close (pfile, cur);
1538+ break;
1539+ }
1540
1541 /* Warn about potential nested comments, but not if the '/'
1542 comes immediately before the true comment delimiter.
1543@@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfi
1544 {
1545 unsigned int cols;
1546 buffer->cur = cur - 1;
1547+ if (warn_bidi_p)
1548+ maybe_warn_bidi_on_close (pfile, cur);
1549 _cpp_process_line_notes (pfile, true);
1550 if (buffer->next_line >= buffer->rlimit)
1551 return true;
1552@@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfi
1553
1554 cur = buffer->cur;
1555 }
1556+ /* If this is a beginning of a UTF-8 encoding, it might be
1557+ a bidirectional control character. */
1558+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
1559+ {
1560+ bidi::kind kind = get_bidi_utf8 (cur - 1);
1561+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
1562+ }
1563 }
1564
1565 buffer->cur = cur;
1566@@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile)
1567 {
1568 cpp_buffer *buffer = pfile->buffer;
1569 location_t orig_line = pfile->line_table->highest_line;
1570+ const bool warn_bidi_p = pfile->warn_bidi_p ();
1571
1572- while (*buffer->cur != '\n')
1573- buffer->cur++;
1574+ if (!warn_bidi_p)
1575+ while (*buffer->cur != '\n')
1576+ buffer->cur++;
1577+ else
1578+ {
1579+ while (*buffer->cur != '\n'
1580+ && *buffer->cur != bidi::utf8_start)
1581+ buffer->cur++;
1582+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
1583+ {
1584+ while (*buffer->cur != '\n')
1585+ {
1586+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
1587+ {
1588+ bidi::kind kind = get_bidi_utf8 (buffer->cur);
1589+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
1590+ /*ucn_p=*/false);
1591+ }
1592+ buffer->cur++;
1593+ }
1594+ maybe_warn_bidi_on_close (pfile, buffer->cur);
1595+ }
1596+ }
1597
1598 _cpp_process_line_notes (pfile, true);
1599 return orig_line != pfile->line_table->highest_line;
1600@@ -1343,11 +1697,13 @@ static const cppchar_t utf8_signifier =
1601
1602 /* Returns TRUE if the sequence starting at buffer->cur is valid in
1603 an identifier. FIRST is TRUE if this starts an identifier. */
1604+
1605 static bool
1606 forms_identifier_p (cpp_reader *pfile, int first,
1607 struct normalize_state *state)
1608 {
1609 cpp_buffer *buffer = pfile->buffer;
1610+ const bool warn_bidi_p = pfile->warn_bidi_p ();
1611
1612 if (*buffer->cur == '$')
1613 {
1614@@ -1370,6 +1726,13 @@ forms_identifier_p (cpp_reader *pfile, i
1615 cppchar_t s;
1616 if (*buffer->cur >= utf8_signifier)
1617 {
1618+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
1619+ && warn_bidi_p)
1620+ {
1621+ bidi::kind kind = get_bidi_utf8 (buffer->cur);
1622+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
1623+ /*ucn_p=*/false);
1624+ }
1625 if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1626 state, &s))
1627 return true;
1628@@ -1378,6 +1741,13 @@ forms_identifier_p (cpp_reader *pfile, i
1629 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1630 {
1631 buffer->cur += 2;
1632+ if (warn_bidi_p)
1633+ {
1634+ bidi::kind kind = get_bidi_ucn (buffer->cur,
1635+ buffer->cur[-1] == 'U');
1636+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
1637+ /*ucn_p=*/true);
1638+ }
1639 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1640 state, &s, NULL, NULL))
1641 return true;
1642@@ -1486,6 +1856,7 @@ lex_identifier (cpp_reader *pfile, const
1643 const uchar *cur;
1644 unsigned int len;
1645 unsigned int hash = HT_HASHSTEP (0, *base);
1646+ const bool warn_bidi_p = pfile->warn_bidi_p ();
1647
1648 cur = pfile->buffer->cur;
1649 if (! starts_ucn)
1650@@ -1509,6 +1880,8 @@ lex_identifier (cpp_reader *pfile, const
1651 pfile->buffer->cur++;
1652 }
1653 } while (forms_identifier_p (pfile, false, nst));
1654+ if (warn_bidi_p)
1655+ maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
1656 result = _cpp_interpret_identifier (pfile, base,
1657 pfile->buffer->cur - base);
1658 *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1659@@ -1697,6 +2070,7 @@ lex_raw_string (cpp_reader *pfile, cpp_t
1660 {
1661 uchar raw_prefix[17];
1662 uchar temp_buffer[18];
1663+ const bool warn_bidi_p = pfile->warn_bidi_p ();
1664 const uchar *orig_base;
1665 unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1666 enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1667@@ -1946,8 +2320,15 @@ lex_raw_string (cpp_reader *pfile, cpp_t
1668 cur = base = pfile->buffer->cur;
1669 note = &pfile->buffer->notes[pfile->buffer->cur_note];
1670 }
1671+ else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
1672+ && warn_bidi_p)
1673+ maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1),
1674+ /*ucn_p=*/false);
1675 }
1676
1677+ if (warn_bidi_p)
1678+ maybe_warn_bidi_on_close (pfile, pos);
1679+
1680 if (CPP_OPTION (pfile, user_literals))
1681 {
1682 /* If a string format macro, say from inttypes.h, is placed touching
1683@@ -2042,15 +2423,27 @@ lex_string (cpp_reader *pfile, cpp_token
1684 else
1685 terminator = '>', type = CPP_HEADER_NAME;
1686
1687+ const bool warn_bidi_p = pfile->warn_bidi_p ();
1688 for (;;)
1689 {
1690 cppchar_t c = *cur++;
1691
1692 /* In #include-style directives, terminators are not escapable. */
1693 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1694- cur++;
1695+ {
1696+ if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
1697+ {
1698+ bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
1699+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
1700+ }
1701+ cur++;
1702+ }
1703 else if (c == terminator)
1704- break;
1705+ {
1706+ if (warn_bidi_p)
1707+ maybe_warn_bidi_on_close (pfile, cur - 1);
1708+ break;
1709+ }
1710 else if (c == '\n')
1711 {
1712 cur--;
1713@@ -2067,6 +2460,11 @@ lex_string (cpp_reader *pfile, cpp_token
1714 }
1715 else if (c == '\0')
1716 saw_NUL = true;
1717+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
1718+ {
1719+ bidi::kind kind = get_bidi_utf8 (cur - 1);
1720+ maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
1721+ }
1722 }
1723
1724 if (saw_NUL && !pfile->state.skipping)