diff options
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch')
-rw-r--r-- | meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch | 1765 |
1 files changed, 1765 insertions, 0 deletions
diff --git a/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch b/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch new file mode 100644 index 0000000000..9bad81d4d0 --- /dev/null +++ b/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch | |||
@@ -0,0 +1,1765 @@ | |||
1 | From 51c500269bf53749b107807d84271385fad35628 Mon Sep 17 00:00:00 2001 | ||
2 | From: Marek Polacek <polacek@redhat.com> | ||
3 | Date: Wed, 6 Oct 2021 14:33:59 -0400 | ||
4 | Subject: [PATCH] libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026] | ||
5 | |||
6 | From a link below: | ||
7 | "An issue was discovered in the Bidirectional Algorithm in the Unicode | ||
8 | Specification through 14.0. It permits the visual reordering of | ||
9 | characters via control sequences, which can be used to craft source code | ||
10 | that renders different logic than the logical ordering of tokens | ||
11 | ingested by compilers and interpreters. Adversaries can leverage this to | ||
12 | encode source code for compilers accepting Unicode such that targeted | ||
13 | vulnerabilities are introduced invisibly to human reviewers." | ||
14 | |||
15 | More info: | ||
16 | https://nvd.nist.gov/vuln/detail/CVE-2021-42574 | ||
17 | https://trojansource.codes/ | ||
18 | |||
19 | This is not a compiler bug. However, to mitigate the problem, this patch | ||
20 | implements -Wbidi-chars=[none|unpaired|any] to warn about possibly | ||
21 | misleading Unicode bidirectional control characters the preprocessor may | ||
22 | encounter. | ||
23 | |||
24 | The default is =unpaired, which warns about improperly terminated | ||
25 | bidirectional control characters; e.g. a LRE without its corresponding PDF. | ||
26 | The level =any warns about any use of bidirectional control characters. | ||
27 | |||
28 | This patch handles both UCNs and UTF-8 characters. UCNs designating | ||
29 | bidi characters in identifiers are accepted since r204886. Then r217144 | ||
30 | enabled -fextended-identifiers by default. Extended characters in C/C++ | ||
31 | identifiers have been accepted since r275979. However, this patch still | ||
32 | warns about mixing UTF-8 and UCN bidi characters; there seems to be no | ||
33 | good reason to allow mixing them. | ||
34 | |||
35 | We warn in different contexts: comments (both C and C++-style), string | ||
36 | literals, character constants, and identifiers. Expectedly, UCNs are ignored | ||
37 | in comments and raw string literals. The bidirectional control characters | ||
38 | can nest so this patch handles that as well. | ||
39 | |||
40 | I have not included nor tested this at all with Fortran (which also has | ||
41 | string literals and line comments). | ||
42 | |||
43 | Dave M. posted patches improving diagnostic involving Unicode characters. | ||
44 | This patch does not make use of this new infrastructure yet. | ||
45 | |||
46 | PR preprocessor/103026 | ||
47 | |||
48 | gcc/c-family/ChangeLog: | ||
49 | |||
50 | * c.opt (Wbidi-chars, Wbidi-chars=): New option. | ||
51 | |||
52 | gcc/ChangeLog: | ||
53 | |||
54 | * doc/invoke.texi: Document -Wbidi-chars. | ||
55 | |||
56 | libcpp/ChangeLog: | ||
57 | |||
58 | * include/cpplib.h (enum cpp_bidirectional_level): New. | ||
59 | (struct cpp_options): Add cpp_warn_bidirectional. | ||
60 | (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL. | ||
61 | * internal.h (struct cpp_reader): Add warn_bidi_p member | ||
62 | function. | ||
63 | * init.c (cpp_create_reader): Set cpp_warn_bidirectional. | ||
64 | * lex.c (bidi): New namespace. | ||
65 | (get_bidi_utf8): New function. | ||
66 | (get_bidi_ucn): Likewise. | ||
67 | (maybe_warn_bidi_on_close): Likewise. | ||
68 | (maybe_warn_bidi_on_char): Likewise. | ||
69 | (_cpp_skip_block_comment): Implement warning about bidirectional | ||
70 | control characters. | ||
71 | (skip_line_comment): Likewise. | ||
72 | (forms_identifier_p): Likewise. | ||
73 | (lex_identifier): Likewise. | ||
74 | (lex_string): Likewise. | ||
75 | (lex_raw_string): Likewise. | ||
76 | |||
77 | gcc/testsuite/ChangeLog: | ||
78 | |||
79 | * c-c++-common/Wbidi-chars-1.c: New test. | ||
80 | * c-c++-common/Wbidi-chars-2.c: New test. | ||
81 | * c-c++-common/Wbidi-chars-3.c: New test. | ||
82 | * c-c++-common/Wbidi-chars-4.c: New test. | ||
83 | * c-c++-common/Wbidi-chars-5.c: New test. | ||
84 | * c-c++-common/Wbidi-chars-6.c: New test. | ||
85 | * c-c++-common/Wbidi-chars-7.c: New test. | ||
86 | * c-c++-common/Wbidi-chars-8.c: New test. | ||
87 | * c-c++-common/Wbidi-chars-9.c: New test. | ||
88 | * c-c++-common/Wbidi-chars-10.c: New test. | ||
89 | * c-c++-common/Wbidi-chars-11.c: New test. | ||
90 | * c-c++-common/Wbidi-chars-12.c: New test. | ||
91 | * c-c++-common/Wbidi-chars-13.c: New test. | ||
92 | * c-c++-common/Wbidi-chars-14.c: New test. | ||
93 | * c-c++-common/Wbidi-chars-15.c: New test. | ||
94 | * c-c++-common/Wbidi-chars-16.c: New test. | ||
95 | * c-c++-common/Wbidi-chars-17.c: New test. | ||
96 | |||
97 | CVE: CVE-2021-42574 | ||
98 | Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=51c500269bf53749b107807d84271385fad35628] | ||
99 | Signed-off-by: Pgowda <pgowda.cve@gmail.com> | ||
100 | |||
101 | --- | ||
102 | gcc/c-family/c.opt | 24 ++ | ||
103 | gcc/doc/invoke.texi | 21 +- | ||
104 | gcc/testsuite/c-c++-common/Wbidi-chars-1.c | 12 + | ||
105 | gcc/testsuite/c-c++-common/Wbidi-chars-10.c | 27 ++ | ||
106 | gcc/testsuite/c-c++-common/Wbidi-chars-11.c | 13 + | ||
107 | gcc/testsuite/c-c++-common/Wbidi-chars-12.c | 19 + | ||
108 | gcc/testsuite/c-c++-common/Wbidi-chars-13.c | 17 + | ||
109 | gcc/testsuite/c-c++-common/Wbidi-chars-14.c | 38 ++ | ||
110 | gcc/testsuite/c-c++-common/Wbidi-chars-15.c | 59 +++ | ||
111 | gcc/testsuite/c-c++-common/Wbidi-chars-16.c | 26 ++ | ||
112 | gcc/testsuite/c-c++-common/Wbidi-chars-17.c | 30 ++ | ||
113 | gcc/testsuite/c-c++-common/Wbidi-chars-2.c | 9 + | ||
114 | gcc/testsuite/c-c++-common/Wbidi-chars-3.c | 11 + | ||
115 | gcc/testsuite/c-c++-common/Wbidi-chars-4.c | 188 +++++++++ | ||
116 | gcc/testsuite/c-c++-common/Wbidi-chars-5.c | 188 +++++++++ | ||
117 | gcc/testsuite/c-c++-common/Wbidi-chars-6.c | 155 ++++++++ | ||
118 | gcc/testsuite/c-c++-common/Wbidi-chars-7.c | 9 + | ||
119 | gcc/testsuite/c-c++-common/Wbidi-chars-8.c | 13 + | ||
120 | gcc/testsuite/c-c++-common/Wbidi-chars-9.c | 29 ++ | ||
121 | libcpp/include/cpplib.h | 18 +- | ||
122 | libcpp/init.c | 1 + | ||
123 | libcpp/internal.h | 7 + | ||
124 | libcpp/lex.c | 408 +++++++++++++++++++- | ||
125 | 23 files changed, 1315 insertions(+), 7 deletions(-) | ||
126 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-1.c | ||
127 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-10.c | ||
128 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-11.c | ||
129 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-12.c | ||
130 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-13.c | ||
131 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-14.c | ||
132 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-15.c | ||
133 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-16.c | ||
134 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-17.c | ||
135 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-2.c | ||
136 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-3.c | ||
137 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-4.c | ||
138 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-5.c | ||
139 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-6.c | ||
140 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-7.c | ||
141 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-8.c | ||
142 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-9.c | ||
143 | |||
144 | diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt | ||
145 | index 8a4cd634f77..3976fc368db 100644 | ||
146 | --- a/gcc/c-family/c.opt | ||
147 | +++ b/gcc/c-family/c.opt | ||
148 | @@ -370,6 +370,30 @@ Wbad-function-cast | ||
149 | C ObjC Var(warn_bad_function_cast) Warning | ||
150 | Warn about casting functions to incompatible types. | ||
151 | |||
152 | +Wbidi-chars | ||
153 | +C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none) | ||
154 | +; | ||
155 | + | ||
156 | +Wbidi-chars= | ||
157 | +C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level) | ||
158 | +-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters. | ||
159 | + | ||
160 | +; Required for these enum values. | ||
161 | +SourceInclude | ||
162 | +cpplib.h | ||
163 | + | ||
164 | +Enum | ||
165 | +Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized) | ||
166 | + | ||
167 | +EnumValue | ||
168 | +Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none) | ||
169 | + | ||
170 | +EnumValue | ||
171 | +Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired) | ||
172 | + | ||
173 | +EnumValue | ||
174 | +Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any) | ||
175 | + | ||
176 | Wbool-compare | ||
177 | C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) | ||
178 | Warn about boolean expression compared with an integer value different from true/false. | ||
179 | diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi | ||
180 | index 6070288856c..a22758d18ee 100644 | ||
181 | --- a/gcc/doc/invoke.texi | ||
182 | +++ b/gcc/doc/invoke.texi | ||
183 | @@ -326,7 +326,9 @@ Objective-C and Objective-C++ Dialects}. | ||
184 | -Warith-conversion @gol | ||
185 | -Warray-bounds -Warray-bounds=@var{n} @gol | ||
186 | -Wno-attributes -Wattribute-alias=@var{n} -Wno-attribute-alias @gol | ||
187 | --Wno-attribute-warning -Wbool-compare -Wbool-operation @gol | ||
188 | +-Wno-attribute-warning @gol | ||
189 | +-Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol | ||
190 | +-Wbool-compare -Wbool-operation @gol | ||
191 | -Wno-builtin-declaration-mismatch @gol | ||
192 | -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol | ||
193 | -Wc11-c2x-compat @gol | ||
194 | @@ -7559,6 +7561,23 @@ Attributes considered include @code{allo | ||
195 | This is the default. You can disable these warnings with either | ||
196 | @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}. | ||
197 | |||
198 | +@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} | ||
199 | +@opindex Wbidi-chars= | ||
200 | +@opindex Wbidi-chars | ||
201 | +@opindex Wno-bidi-chars | ||
202 | +Warn about possibly misleading UTF-8 bidirectional control characters in | ||
203 | +comments, string literals, character constants, and identifiers. Such | ||
204 | +characters can change left-to-right writing direction into right-to-left | ||
205 | +(and vice versa), which can cause confusion between the logical order and | ||
206 | +visual order. This may be dangerous; for instance, it may seem that a piece | ||
207 | +of code is not commented out, whereas it in fact is. | ||
208 | + | ||
209 | +There are three levels of warning supported by GCC@. The default is | ||
210 | +@option{-Wbidi-chars=unpaired}, which warns about improperly terminated | ||
211 | +bidi contexts. @option{-Wbidi-chars=none} turns the warning off. | ||
212 | +@option{-Wbidi-chars=any} warns about any use of bidirectional control | ||
213 | +characters. | ||
214 | + | ||
215 | @item -Wbool-compare | ||
216 | @opindex Wno-bool-compare | ||
217 | @opindex Wbool-compare | ||
218 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c | ||
219 | new file mode 100644 | ||
220 | index 00000000000..34f5ac19271 | ||
221 | --- /dev/null | ||
222 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c | ||
223 | @@ -0,0 +1,27 @@ | ||
224 | +/* PR preprocessor/103026 */ | ||
225 | +/* { dg-do compile } */ | ||
226 | +/* { dg-options "-Wbidi-chars=unpaired" } */ | ||
227 | +/* More nesting testing. */ | ||
228 | + | ||
229 | +/* RLEâ« LRI⦠PDF⬠PDIâ©*/ | ||
230 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
231 | +int LRE_\u202a_PDF_\u202c; | ||
232 | +int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c; | ||
233 | +int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069; | ||
234 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
235 | +int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069; | ||
236 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
237 | +int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c; | ||
238 | +int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c; | ||
239 | +int FSI_\u2068; | ||
240 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
241 | +int FSI_\u2068_PDI_\u2069; | ||
242 | +int FSI_\u2068_FSI_\u2068_PDI_\u2069; | ||
243 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
244 | +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; | ||
245 | +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; | ||
246 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
247 | +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c; | ||
248 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
249 | +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; | ||
250 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
251 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c | ||
252 | new file mode 100644 | ||
253 | index 00000000000..270ce2368a9 | ||
254 | --- /dev/null | ||
255 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c | ||
256 | @@ -0,0 +1,13 @@ | ||
257 | +/* PR preprocessor/103026 */ | ||
258 | +/* { dg-do compile } */ | ||
259 | +/* { dg-options "-Wbidi-chars=unpaired" } */ | ||
260 | +/* Test that we warn when mixing UCN and UTF-8. */ | ||
261 | + | ||
262 | +int LRE_âª_PDF_\u202c; | ||
263 | +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ | ||
264 | +int LRE_\u202a_PDF_â¬_; | ||
265 | +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ | ||
266 | +const char *s1 = "LRE_âª_PDF_\u202c"; | ||
267 | +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ | ||
268 | +const char *s2 = "LRE_\u202a_PDF_â¬"; | ||
269 | +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ | ||
270 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c | ||
271 | new file mode 100644 | ||
272 | index 00000000000..b07eec1da91 | ||
273 | --- /dev/null | ||
274 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c | ||
275 | @@ -0,0 +1,19 @@ | ||
276 | +/* PR preprocessor/103026 */ | ||
277 | +/* { dg-do compile { target { c || c++11 } } } */ | ||
278 | +/* { dg-options "-Wbidi-chars=any" } */ | ||
279 | +/* Test raw strings. */ | ||
280 | + | ||
281 | +const char *s1 = R"(a b c LRE⪠1 2 3 PDF⬠x y z)"; | ||
282 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
283 | +const char *s2 = R"(a b c RLE⫠1 2 3 PDF⬠x y z)"; | ||
284 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ | ||
285 | +const char *s3 = R"(a b c LROâ 1 2 3 PDF⬠x y z)"; | ||
286 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ | ||
287 | +const char *s4 = R"(a b c RLO⮠1 2 3 PDF⬠x y z)"; | ||
288 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ | ||
289 | +const char *s7 = R"(a b c FSI⨠1 2 3 PDI⩠x y) z"; | ||
290 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ | ||
291 | +const char *s8 = R"(a b c PDIâ© x y )z"; | ||
292 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ | ||
293 | +const char *s9 = R"(a b c PDF⬠x y z)"; | ||
294 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ | ||
295 | diff -uprN '-x*.orig' '-x*.rej' del/gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c | ||
296 | --- del/gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 1969-12-31 16:00:00.000000000 -0800 | ||
297 | +++ gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 2021-12-13 23:11:22.328439287 -0800 | ||
298 | @@ -0,0 +1,17 @@ | ||
299 | +/* PR preprocessor/103026 */ | ||
300 | +/* { dg-do compile { target { c || c++11 } } } */ | ||
301 | +/* { dg-options "-Wbidi-chars=unpaired" } */ | ||
302 | +/* Test raw strings. */ | ||
303 | + | ||
304 | +const char *s1 = R"(a b c LRE⪠1 2 3)"; | ||
305 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
306 | +const char *s2 = R"(a b c RLEâ« 1 2 3)"; | ||
307 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
308 | +const char *s3 = R"(a b c LROâ 1 2 3)"; | ||
309 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
310 | +const char *s4 = R"(a b c FSI⨠1 2 3)"; | ||
311 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
312 | +const char *s5 = R"(a b c LRI⦠1 2 3)"; | ||
313 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
314 | +const char *s6 = R"(a b c RLI⧠1 2 3)"; | ||
315 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
316 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c | ||
317 | new file mode 100644 | ||
318 | index 00000000000..ba5f75d9553 | ||
319 | --- /dev/null | ||
320 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c | ||
321 | @@ -0,0 +1,38 @@ | ||
322 | +/* PR preprocessor/103026 */ | ||
323 | +/* { dg-do compile } */ | ||
324 | +/* { dg-options "-Wbidi-chars=unpaired" } */ | ||
325 | +/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs, | ||
326 | + or RLOs. */ | ||
327 | + | ||
328 | +/* LRI_â¦_LRI_â¦_RLE_â«_RLE_â«_RLE_â«_PDI_â©*/ | ||
329 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
330 | +// LRI_â¦_RLE_â«_RLE_â«_RLE_â«_PDI_â© | ||
331 | +// LRI_â¦_RLO_â®_RLE_â«_RLE_â«_PDI_â© | ||
332 | +// LRI_â¦_RLO_â®_RLE_â«_PDI_â© | ||
333 | +// FSI_â¨_RLO_â®_PDI_â© | ||
334 | +// FSI_â¨_FSI_â¨_RLO_â®_PDI_â© | ||
335 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
336 | + | ||
337 | +int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069; | ||
338 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
339 | +int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; | ||
340 | +int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; | ||
341 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
342 | +int PDI_\u2069; | ||
343 | +int LRI_\u2066_PDI_\u2069; | ||
344 | +int RLI_\u2067_PDI_\u2069; | ||
345 | +int LRE_\u202a_LRI_\u2066_PDI_\u2069; | ||
346 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
347 | +int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069; | ||
348 | +int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; | ||
349 | +int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; | ||
350 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
351 | +int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; | ||
352 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
353 | +int RLO_\u202e_PDI_\u2069; | ||
354 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
355 | +int RLI_\u2067_PDI_\u2069_RLI_\u2067; | ||
356 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
357 | +int FSI_\u2068_PDF_\u202c_PDI_\u2069; | ||
358 | +int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069; | ||
359 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
360 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c | ||
361 | new file mode 100644 | ||
362 | index 00000000000..a0ce8ff5e2c | ||
363 | --- /dev/null | ||
364 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c | ||
365 | @@ -0,0 +1,59 @@ | ||
366 | +/* PR preprocessor/103026 */ | ||
367 | +/* { dg-do compile } */ | ||
368 | +/* { dg-options "-Wbidi-chars=unpaired" } */ | ||
369 | +/* Test unpaired bidi control chars in multiline comments. */ | ||
370 | + | ||
371 | +/* | ||
372 | + * LRE⪠end | ||
373 | + */ | ||
374 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
375 | +/* | ||
376 | + * RLEâ« end | ||
377 | + */ | ||
378 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
379 | +/* | ||
380 | + * LROâ end | ||
381 | + */ | ||
382 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
383 | +/* | ||
384 | + * RLOâ® end | ||
385 | + */ | ||
386 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
387 | +/* | ||
388 | + * LRI⦠end | ||
389 | + */ | ||
390 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
391 | +/* | ||
392 | + * RLI⧠end | ||
393 | + */ | ||
394 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
395 | +/* | ||
396 | + * FSI⨠end | ||
397 | + */ | ||
398 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
399 | +/* LRE⪠| ||
400 | + PDF⬠*/ | ||
401 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
402 | +/* FSI⨠| ||
403 | + PDIâ© */ | ||
404 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
405 | + | ||
406 | +/* LRE<âª> | ||
407 | + * | ||
408 | + */ | ||
409 | +/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */ | ||
410 | + | ||
411 | +/* | ||
412 | + * LRE<âª> | ||
413 | + */ | ||
414 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
415 | + | ||
416 | +/* | ||
417 | + * | ||
418 | + * LRE<âª> */ | ||
419 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
420 | + | ||
421 | +/* RLI<â§> */ /* PDI<â©> */ | ||
422 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
423 | +/* LRE<âª> */ /* PDF<â¬> */ | ||
424 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
425 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c | ||
426 | new file mode 100644 | ||
427 | index 00000000000..baa0159861c | ||
428 | --- /dev/null | ||
429 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c | ||
430 | @@ -0,0 +1,26 @@ | ||
431 | +/* PR preprocessor/103026 */ | ||
432 | +/* { dg-do compile } */ | ||
433 | +/* { dg-options "-Wbidi-chars=any" } */ | ||
434 | +/* Test LTR/RTL chars. */ | ||
435 | + | ||
436 | +/* LTR<â> */ | ||
437 | +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ | ||
438 | +// LTR<â> | ||
439 | +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ | ||
440 | +/* RTL<â> */ | ||
441 | +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ | ||
442 | +// RTL<â> | ||
443 | +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ | ||
444 | + | ||
445 | +const char *s1 = "LTR<â>"; | ||
446 | +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ | ||
447 | +const char *s2 = "LTR\u200e"; | ||
448 | +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ | ||
449 | +const char *s3 = "LTR\u200E"; | ||
450 | +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ | ||
451 | +const char *s4 = "RTL<â>"; | ||
452 | +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ | ||
453 | +const char *s5 = "RTL\u200f"; | ||
454 | +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ | ||
455 | +const char *s6 = "RTL\u200F"; | ||
456 | +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ | ||
457 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c | ||
458 | new file mode 100644 | ||
459 | index 00000000000..07cb4321f96 | ||
460 | --- /dev/null | ||
461 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c | ||
462 | @@ -0,0 +1,30 @@ | ||
463 | +/* PR preprocessor/103026 */ | ||
464 | +/* { dg-do compile } */ | ||
465 | +/* { dg-options "-Wbidi-chars=unpaired" } */ | ||
466 | +/* Test LTR/RTL chars. */ | ||
467 | + | ||
468 | +/* LTR<â> */ | ||
469 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
470 | +// LTR<â> | ||
471 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
472 | +/* RTL<â> */ | ||
473 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
474 | +// RTL<â> | ||
475 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
476 | +int ltr_\u200e; | ||
477 | +/* { dg-error "universal character " "" { target *-*-* } .-1 } */ | ||
478 | +int rtl_\u200f; | ||
479 | +/* { dg-error "universal character " "" { target *-*-* } .-1 } */ | ||
480 | + | ||
481 | +const char *s1 = "LTR<â>"; | ||
482 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
483 | +const char *s2 = "LTR\u200e"; | ||
484 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
485 | +const char *s3 = "LTR\u200E"; | ||
486 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
487 | +const char *s4 = "RTL<â>"; | ||
488 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
489 | +const char *s5 = "RTL\u200f"; | ||
490 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
491 | +const char *s6 = "RTL\u200F"; | ||
492 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
493 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c | ||
494 | new file mode 100644 | ||
495 | index 00000000000..2340374f276 | ||
496 | --- /dev/null | ||
497 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c | ||
498 | @@ -0,0 +1,12 @@ | ||
499 | +/* PR preprocessor/103026 */ | ||
500 | +/* { dg-do compile } */ | ||
501 | + | ||
502 | +int main() { | ||
503 | + int isAdmin = 0; | ||
504 | + /*â® } â¦if (isAdmin)⩠⦠begin admins only */ | ||
505 | +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ | ||
506 | + __builtin_printf("You are an admin.\n"); | ||
507 | + /* end admins only â® { â¦*/ | ||
508 | +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ | ||
509 | + return 0; | ||
510 | +} | ||
511 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c | ||
512 | new file mode 100644 | ||
513 | index 00000000000..2340374f276 | ||
514 | --- /dev/null | ||
515 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c | ||
516 | @@ -0,0 +1,9 @@ | ||
517 | +/* PR preprocessor/103026 */ | ||
518 | +/* { dg-do compile } */ | ||
519 | + | ||
520 | +int main() { | ||
521 | + /* Say hello; newlineâ§/*/ return 0 ; | ||
522 | +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ | ||
523 | + __builtin_printf("Hello world.\n"); | ||
524 | + return 0; | ||
525 | +} | ||
526 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c | ||
527 | new file mode 100644 | ||
528 | index 00000000000..9dc7edb6e64 | ||
529 | --- /dev/null | ||
530 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c | ||
531 | @@ -0,0 +1,11 @@ | ||
532 | +/* PR preprocessor/103026 */ | ||
533 | +/* { dg-do compile } */ | ||
534 | + | ||
535 | +int main() { | ||
536 | + const char* access_level = "user"; | ||
537 | + if (__builtin_strcmp(access_level, "userâ® â¦// Check if adminâ© â¦")) { | ||
538 | +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ | ||
539 | + __builtin_printf("You are an admin.\n"); | ||
540 | + } | ||
541 | + return 0; | ||
542 | +} | ||
543 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c | ||
544 | new file mode 100644 | ||
545 | index 00000000000..639e5c62e88 | ||
546 | --- /dev/null | ||
547 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c | ||
548 | @@ -0,0 +1,188 @@ | ||
549 | +/* PR preprocessor/103026 */ | ||
550 | +/* { dg-do compile } */ | ||
551 | +/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */ | ||
552 | +/* Test all bidi chars in various contexts (identifiers, comments, | ||
553 | + string literals, character constants), both UCN and UTF-8. The bidi | ||
554 | + chars here are properly terminated, except for the character constants. */ | ||
555 | + | ||
556 | +/* a b c LRE⪠1 2 3 PDF⬠x y z */ | ||
557 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
558 | +/* a b c RLE⫠1 2 3 PDF⬠x y z */ | ||
559 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ | ||
560 | +/* a b c LROâ 1 2 3 PDF⬠x y z */ | ||
561 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ | ||
562 | +/* a b c RLO⮠1 2 3 PDF⬠x y z */ | ||
563 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ | ||
564 | +/* a b c LRI⦠1 2 3 PDI⩠x y z */ | ||
565 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ | ||
566 | +/* a b c RLI⧠1 2 3 PDI⩠x y */ | ||
567 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ | ||
568 | +/* a b c FSI⨠1 2 3 PDI⩠x y z */ | ||
569 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ | ||
570 | + | ||
571 | +/* Same but C++ comments instead. */ | ||
572 | +// a b c LRE⪠1 2 3 PDF⬠x y z | ||
573 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
574 | +// a b c RLE⫠1 2 3 PDF⬠x y z | ||
575 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ | ||
576 | +// a b c LROâ 1 2 3 PDF⬠x y z | ||
577 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ | ||
578 | +// a b c RLO⮠1 2 3 PDF⬠x y z | ||
579 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ | ||
580 | +// a b c LRI⦠1 2 3 PDI⩠x y z | ||
581 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ | ||
582 | +// a b c RLI⧠1 2 3 PDI⩠x y | ||
583 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ | ||
584 | +// a b c FSI⨠1 2 3 PDI⩠x y z | ||
585 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ | ||
586 | + | ||
587 | +/* Here we're closing an unopened context, warn when =any. */ | ||
588 | +/* a b c PDIâ© x y z */ | ||
589 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ | ||
590 | +/* a b c PDF⬠x y z */ | ||
591 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ | ||
592 | +// a b c PDIâ© x y z | ||
593 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ | ||
594 | +// a b c PDF⬠x y z | ||
595 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ | ||
596 | + | ||
597 | +/* Multiline comments. */ | ||
598 | +/* a b c PDIâ© x y z | ||
599 | + */ | ||
600 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ | ||
601 | +/* a b c PDF⬠x y z | ||
602 | + */ | ||
603 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ | ||
604 | +/* first | ||
605 | + a b c PDIâ© x y z | ||
606 | + */ | ||
607 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ | ||
608 | +/* first | ||
609 | + a b c PDF⬠x y z | ||
610 | + */ | ||
611 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ | ||
612 | +/* first | ||
613 | + a b c PDIâ© x y z */ | ||
614 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ | ||
615 | +/* first | ||
616 | + a b c PDF⬠x y z */ | ||
617 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ | ||
618 | + | ||
619 | +void | ||
620 | +g1 () | ||
621 | +{ | ||
622 | + const char *s1 = "a b c LRE⪠1 2 3 PDF⬠x y z"; | ||
623 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
624 | + const char *s2 = "a b c RLE⫠1 2 3 PDF⬠x y z"; | ||
625 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ | ||
626 | + const char *s3 = "a b c LROâ 1 2 3 PDF⬠x y z"; | ||
627 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ | ||
628 | + const char *s4 = "a b c RLO⮠1 2 3 PDF⬠x y z"; | ||
629 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ | ||
630 | + const char *s5 = "a b c LRI⦠1 2 3 PDI⩠x y z"; | ||
631 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ | ||
632 | + const char *s6 = "a b c RLI⧠1 2 3 PDI⩠x y z"; | ||
633 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ | ||
634 | + const char *s7 = "a b c FSI⨠1 2 3 PDI⩠x y z"; | ||
635 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ | ||
636 | + const char *s8 = "a b c PDIâ© x y z"; | ||
637 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ | ||
638 | + const char *s9 = "a b c PDF⬠x y z"; | ||
639 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ | ||
640 | + | ||
641 | + const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; | ||
642 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
643 | + const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; | ||
644 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
645 | + const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; | ||
646 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ | ||
647 | + const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; | ||
648 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ | ||
649 | + const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; | ||
650 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ | ||
651 | + const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; | ||
652 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ | ||
653 | + const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; | ||
654 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ | ||
655 | + const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; | ||
656 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ | ||
657 | + const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; | ||
658 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ | ||
659 | + const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; | ||
660 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ | ||
661 | + const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; | ||
662 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ | ||
663 | +} | ||
664 | + | ||
665 | +void | ||
666 | +g2 () | ||
667 | +{ | ||
668 | + const char c1 = '\u202a'; | ||
669 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
670 | + const char c2 = '\u202A'; | ||
671 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
672 | + const char c3 = '\u202b'; | ||
673 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ | ||
674 | + const char c4 = '\u202B'; | ||
675 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ | ||
676 | + const char c5 = '\u202d'; | ||
677 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ | ||
678 | + const char c6 = '\u202D'; | ||
679 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ | ||
680 | + const char c7 = '\u202e'; | ||
681 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ | ||
682 | + const char c8 = '\u202E'; | ||
683 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ | ||
684 | + const char c9 = '\u2066'; | ||
685 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ | ||
686 | + const char c10 = '\u2067'; | ||
687 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ | ||
688 | + const char c11 = '\u2068'; | ||
689 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ | ||
690 | +} | ||
691 | + | ||
692 | +int aâªbâ¬c; | ||
693 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
694 | +int aâ«bâ¬c; | ||
695 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ | ||
696 | +int aâÂbâ¬c; | ||
697 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ | ||
698 | +int aâ®bâ¬c; | ||
699 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ | ||
700 | +int aâ¦bâ©c; | ||
701 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ | ||
702 | +int aâ§bâ©c; | ||
703 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ | ||
704 | +int aâ¨bâ©c; | ||
705 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ | ||
706 | +int Aâ¬X; | ||
707 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ | ||
708 | +int A\u202cY; | ||
709 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ | ||
710 | +int A\u202CY2; | ||
711 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ | ||
712 | + | ||
713 | +int d\u202ae\u202cf; | ||
714 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
715 | +int d\u202Ae\u202cf2; | ||
716 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
717 | +int d\u202be\u202cf; | ||
718 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ | ||
719 | +int d\u202Be\u202cf2; | ||
720 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ | ||
721 | +int d\u202de\u202cf; | ||
722 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ | ||
723 | +int d\u202De\u202cf2; | ||
724 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ | ||
725 | +int d\u202ee\u202cf; | ||
726 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ | ||
727 | +int d\u202Ee\u202cf2; | ||
728 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ | ||
729 | +int d\u2066e\u2069f; | ||
730 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ | ||
731 | +int d\u2067e\u2069f; | ||
732 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ | ||
733 | +int d\u2068e\u2069f; | ||
734 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ | ||
735 | +int X\u2069; | ||
736 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ | ||
737 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c | ||
738 | new file mode 100644 | ||
739 | index 00000000000..68cb053144b | ||
740 | --- /dev/null | ||
741 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c | ||
742 | @@ -0,0 +1,188 @@ | ||
743 | +/* PR preprocessor/103026 */ | ||
744 | +/* { dg-do compile } */ | ||
745 | +/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */ | ||
746 | +/* Test all bidi chars in various contexts (identifiers, comments, | ||
747 | + string literals, character constants), both UCN and UTF-8. The bidi | ||
748 | + chars here are properly terminated, except for the character constants. */ | ||
749 | + | ||
750 | +/* a b c LRE⪠1 2 3 PDF⬠x y z */ | ||
751 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
752 | +/* a b c RLE⫠1 2 3 PDF⬠x y z */ | ||
753 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
754 | +/* a b c LROâ 1 2 3 PDF⬠x y z */ | ||
755 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
756 | +/* a b c RLO⮠1 2 3 PDF⬠x y z */ | ||
757 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
758 | +/* a b c LRI⦠1 2 3 PDI⩠x y z */ | ||
759 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
760 | +/* a b c RLI⧠1 2 3 PDI⩠x y */ | ||
761 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
762 | +/* a b c FSI⨠1 2 3 PDI⩠x y z */ | ||
763 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
764 | + | ||
765 | +/* Same but C++ comments instead. */ | ||
766 | +// a b c LRE⪠1 2 3 PDF⬠x y z | ||
767 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
768 | +// a b c RLE⫠1 2 3 PDF⬠x y z | ||
769 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
770 | +// a b c LROâ 1 2 3 PDF⬠x y z | ||
771 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
772 | +// a b c RLO⮠1 2 3 PDF⬠x y z | ||
773 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
774 | +// a b c LRI⦠1 2 3 PDI⩠x y z | ||
775 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
776 | +// a b c RLI⧠1 2 3 PDI⩠x y | ||
777 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
778 | +// a b c FSI⨠1 2 3 PDI⩠x y z | ||
779 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
780 | + | ||
781 | +/* Here we're closing an unopened context, warn when =any. */ | ||
782 | +/* a b c PDIâ© x y z */ | ||
783 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
784 | +/* a b c PDF⬠x y z */ | ||
785 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
786 | +// a b c PDIâ© x y z | ||
787 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
788 | +// a b c PDF⬠x y z | ||
789 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
790 | + | ||
791 | +/* Multiline comments. */ | ||
792 | +/* a b c PDIâ© x y z | ||
793 | + */ | ||
794 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ | ||
795 | +/* a b c PDF⬠x y z | ||
796 | + */ | ||
797 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ | ||
798 | +/* first | ||
799 | + a b c PDIâ© x y z | ||
800 | + */ | ||
801 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ | ||
802 | +/* first | ||
803 | + a b c PDF⬠x y z | ||
804 | + */ | ||
805 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ | ||
806 | +/* first | ||
807 | + a b c PDIâ© x y z */ | ||
808 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
809 | +/* first | ||
810 | + a b c PDF⬠x y z */ | ||
811 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
812 | + | ||
813 | +void | ||
814 | +g1 () | ||
815 | +{ | ||
816 | + const char *s1 = "a b c LRE⪠1 2 3 PDF⬠x y z"; | ||
817 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
818 | + const char *s2 = "a b c RLE⫠1 2 3 PDF⬠x y z"; | ||
819 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
820 | + const char *s3 = "a b c LROâ 1 2 3 PDF⬠x y z"; | ||
821 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
822 | + const char *s4 = "a b c RLO⮠1 2 3 PDF⬠x y z"; | ||
823 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
824 | + const char *s5 = "a b c LRI⦠1 2 3 PDI⩠x y z"; | ||
825 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
826 | + const char *s6 = "a b c RLI⧠1 2 3 PDI⩠x y z"; | ||
827 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
828 | + const char *s7 = "a b c FSI⨠1 2 3 PDI⩠x y z"; | ||
829 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
830 | + const char *s8 = "a b c PDIâ© x y z"; | ||
831 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
832 | + const char *s9 = "a b c PDF⬠x y z"; | ||
833 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
834 | + | ||
835 | + const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; | ||
836 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
837 | + const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; | ||
838 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
839 | + const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; | ||
840 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
841 | + const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; | ||
842 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
843 | + const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; | ||
844 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
845 | + const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; | ||
846 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
847 | + const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; | ||
848 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
849 | + const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; | ||
850 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
851 | + const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; | ||
852 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
853 | + const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; | ||
854 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
855 | + const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; | ||
856 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
857 | +} | ||
858 | + | ||
859 | +void | ||
860 | +g2 () | ||
861 | +{ | ||
862 | + const char c1 = '\u202a'; | ||
863 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
864 | + const char c2 = '\u202A'; | ||
865 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
866 | + const char c3 = '\u202b'; | ||
867 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
868 | + const char c4 = '\u202B'; | ||
869 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
870 | + const char c5 = '\u202d'; | ||
871 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
872 | + const char c6 = '\u202D'; | ||
873 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
874 | + const char c7 = '\u202e'; | ||
875 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
876 | + const char c8 = '\u202E'; | ||
877 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
878 | + const char c9 = '\u2066'; | ||
879 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
880 | + const char c10 = '\u2067'; | ||
881 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
882 | + const char c11 = '\u2068'; | ||
883 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
884 | +} | ||
885 | + | ||
886 | +int aâªbâ¬c; | ||
887 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
888 | +int aâ«bâ¬c; | ||
889 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
890 | +int aâÂbâ¬c; | ||
891 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
892 | +int aâ®bâ¬c; | ||
893 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
894 | +int aâ¦bâ©c; | ||
895 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
896 | +int aâ§bâ©c; | ||
897 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
898 | +int aâ¨bâ©c; | ||
899 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
900 | +int Aâ¬X; | ||
901 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
902 | +int A\u202cY; | ||
903 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
904 | +int A\u202CY2; | ||
905 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
906 | + | ||
907 | +int d\u202ae\u202cf; | ||
908 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
909 | +int d\u202Ae\u202cf2; | ||
910 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
911 | +int d\u202be\u202cf; | ||
912 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
913 | +int d\u202Be\u202cf2; | ||
914 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
915 | +int d\u202de\u202cf; | ||
916 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
917 | +int d\u202De\u202cf2; | ||
918 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
919 | +int d\u202ee\u202cf; | ||
920 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
921 | +int d\u202Ee\u202cf2; | ||
922 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
923 | +int d\u2066e\u2069f; | ||
924 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
925 | +int d\u2067e\u2069f; | ||
926 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
927 | +int d\u2068e\u2069f; | ||
928 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
929 | +int X\u2069; | ||
930 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ | ||
931 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c | ||
932 | new file mode 100644 | ||
933 | index 00000000000..0ce6fff2dee | ||
934 | --- /dev/null | ||
935 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c | ||
936 | @@ -0,0 +1,155 @@ | ||
937 | +/* PR preprocessor/103026 */ | ||
938 | +/* { dg-do compile } */ | ||
939 | +/* { dg-options "-Wbidi-chars=unpaired" } */ | ||
940 | +/* Test nesting of bidi chars in various contexts. */ | ||
941 | + | ||
942 | +/* Terminated by the wrong char: */ | ||
943 | +/* a b c LRE⪠1 2 3 PDI⩠x y z */ | ||
944 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
945 | +/* a b c RLEâ« 1 2 3 PDIâ© x y z*/ | ||
946 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
947 | +/* a b c LROâ 1 2 3 PDIâ© x y z */ | ||
948 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
949 | +/* a b c RLOâ® 1 2 3 PDIâ© x y z */ | ||
950 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
951 | +/* a b c LRI⦠1 2 3 PDF⬠x y z */ | ||
952 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
953 | +/* a b c RLI⧠1 2 3 PDF⬠x y z */ | ||
954 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
955 | +/* a b c FSI⨠1 2 3 PDF⬠x y z*/ | ||
956 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
957 | + | ||
958 | +/* LRE⪠PDF⬠*/ | ||
959 | +/* LRE⪠LRE⪠PDF⬠PDF⬠*/ | ||
960 | +/* PDF⬠LRE⪠PDF⬠*/ | ||
961 | +/* LRE⪠PDF⬠LRE⪠PDF⬠*/ | ||
962 | +/* LRE⪠LRE⪠PDF⬠*/ | ||
963 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
964 | +/* PDF⬠LRE⪠*/ | ||
965 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
966 | + | ||
967 | +// a b c LRE⪠1 2 3 PDI⩠x y z | ||
968 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
969 | +// a b c RLEâ« 1 2 3 PDIâ© x y z*/ | ||
970 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
971 | +// a b c LROâ 1 2 3 PDIâ© x y z | ||
972 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
973 | +// a b c RLOâ® 1 2 3 PDIâ© x y z | ||
974 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
975 | +// a b c LRI⦠1 2 3 PDF⬠x y z | ||
976 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
977 | +// a b c RLI⧠1 2 3 PDF⬠x y z | ||
978 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
979 | +// a b c FSI⨠1 2 3 PDF⬠x y z | ||
980 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
981 | + | ||
982 | +// LRE⪠PDF⬠| ||
983 | +// LRE⪠LRE⪠PDF⬠PDF⬠| ||
984 | +// PDF⬠LRE⪠PDF⬠| ||
985 | +// LRE⪠PDF⬠LRE⪠PDF⬠| ||
986 | +// LRE⪠LRE⪠PDF⬠| ||
987 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
988 | +// PDF⬠LRE⪠| ||
989 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
990 | + | ||
991 | +void | ||
992 | +g1 () | ||
993 | +{ | ||
994 | + const char *s1 = "a b c LRE⪠1 2 3 PDI⩠x y z"; | ||
995 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
996 | + const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z"; | ||
997 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
998 | + const char *s3 = "a b c RLEâ« 1 2 3 PDIâ© x y "; | ||
999 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1000 | + const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z"; | ||
1001 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1002 | + const char *s5 = "a b c LROâ 1 2 3 PDIâ© x y z"; | ||
1003 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1004 | + const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z"; | ||
1005 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1006 | + const char *s7 = "a b c RLOâ® 1 2 3 PDIâ© x y z"; | ||
1007 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1008 | + const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z"; | ||
1009 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1010 | + const char *s9 = "a b c LRI⦠1 2 3 PDF⬠x y z"; | ||
1011 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1012 | + const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z"; | ||
1013 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1014 | + const char *s11 = "a b c RLI⧠1 2 3 PDF⬠x y z\ | ||
1015 | + "; | ||
1016 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
1017 | + const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z"; | ||
1018 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1019 | + const char *s13 = "a b c FSI⨠1 2 3 PDF⬠x y z"; | ||
1020 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1021 | + const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z"; | ||
1022 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1023 | + const char *s15 = "PDF⬠LREâª"; | ||
1024 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1025 | + const char *s16 = "PDF\u202c LRE\u202a"; | ||
1026 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1027 | + const char *s17 = "LRE⪠PDFâ¬"; | ||
1028 | + const char *s18 = "LRE\u202a PDF\u202c"; | ||
1029 | + const char *s19 = "LRE⪠LRE⪠PDF⬠PDFâ¬"; | ||
1030 | + const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c"; | ||
1031 | + const char *s21 = "PDF⬠LRE⪠PDFâ¬"; | ||
1032 | + const char *s22 = "PDF\u202c LRE\u202a PDF\u202c"; | ||
1033 | + const char *s23 = "LRE⪠LRE⪠PDFâ¬"; | ||
1034 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1035 | + const char *s24 = "LRE\u202a LRE\u202a PDF\u202c"; | ||
1036 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1037 | + const char *s25 = "PDF⬠LREâª"; | ||
1038 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1039 | + const char *s26 = "PDF\u202c LRE\u202a"; | ||
1040 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1041 | + const char *s27 = "PDF⬠LRE\u202a"; | ||
1042 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1043 | + const char *s28 = "PDF\u202c LREâª"; | ||
1044 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1045 | +} | ||
1046 | + | ||
1047 | +int aLREâªbPDIâ©; | ||
1048 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1049 | +int A\u202aB\u2069C; | ||
1050 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1051 | +int aRLEâ«bPDIâ©; | ||
1052 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1053 | +int a\u202bB\u2069c; | ||
1054 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1055 | +int aLROâÂbPDIâ©; | ||
1056 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1057 | +int a\u202db\u2069c2; | ||
1058 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1059 | +int aRLOâ®bPDIâ©; | ||
1060 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1061 | +int a\u202eb\u2069; | ||
1062 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1063 | +int aLRIâ¦bPDFâ¬; | ||
1064 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1065 | +int a\u2066b\u202c; | ||
1066 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1067 | +int aRLIâ§bPDFâ¬c | ||
1068 | +; | ||
1069 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ | ||
1070 | +int a\u2067b\u202c; | ||
1071 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1072 | +int aFSIâ¨bPDFâ¬; | ||
1073 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1074 | +int a\u2068b\u202c; | ||
1075 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1076 | +int aFSIâ¨bPD\u202C; | ||
1077 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1078 | +int aFSI\u2068bPDFâ¬_; | ||
1079 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1080 | +int aLREâªbPDFâ¬b; | ||
1081 | +int A\u202aB\u202c; | ||
1082 | +int a_LREâª_LREâª_b_PDFâ¬_PDFâ¬; | ||
1083 | +int A\u202aA\u202aB\u202cB\u202c; | ||
1084 | +int aPDFâ¬bLREadPDFâ¬; | ||
1085 | +int a_\u202C_\u202a_\u202c; | ||
1086 | +int a_LREâª_b_PDFâ¬_c_LREâª_PDFâ¬; | ||
1087 | +int a_\u202a_\u202c_\u202a_\u202c_; | ||
1088 | +int a_LREâª_b_PDFâ¬_c_LREâª; | ||
1089 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1090 | +int a_\u202a_\u202c_\u202a_; | ||
1091 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1092 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c | ||
1093 | new file mode 100644 | ||
1094 | index 00000000000..d012d420ec0 | ||
1095 | --- /dev/null | ||
1096 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c | ||
1097 | @@ -0,0 +1,9 @@ | ||
1098 | +/* PR preprocessor/103026 */ | ||
1099 | +/* { dg-do compile } */ | ||
1100 | +/* { dg-options "-Wbidi-chars=any" } */ | ||
1101 | +/* Test we ignore UCNs in comments. */ | ||
1102 | + | ||
1103 | +// a b c \u202a 1 2 3 | ||
1104 | +// a b c \u202A 1 2 3 | ||
1105 | +/* a b c \u202a 1 2 3 */ | ||
1106 | +/* a b c \u202A 1 2 3 */ | ||
1107 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c | ||
1108 | new file mode 100644 | ||
1109 | index 00000000000..4f54c5092ec | ||
1110 | --- /dev/null | ||
1111 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c | ||
1112 | @@ -0,0 +1,13 @@ | ||
1113 | +/* PR preprocessor/103026 */ | ||
1114 | +/* { dg-do compile } */ | ||
1115 | +/* { dg-options "-Wbidi-chars=any" } */ | ||
1116 | +/* Test \u vs \U. */ | ||
1117 | + | ||
1118 | +int a_\u202A; | ||
1119 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
1120 | +int a_\u202a_2; | ||
1121 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
1122 | +int a_\U0000202A_3; | ||
1123 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
1124 | +int a_\U0000202a_4; | ||
1125 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ | ||
1126 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c | ||
1127 | new file mode 100644 | ||
1128 | index 00000000000..e2af1b1ca97 | ||
1129 | --- /dev/null | ||
1130 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c | ||
1131 | @@ -0,0 +1,29 @@ | ||
1132 | +/* PR preprocessor/103026 */ | ||
1133 | +/* { dg-do compile } */ | ||
1134 | +/* { dg-options "-Wbidi-chars=unpaired" } */ | ||
1135 | +/* Test that we properly separate bidi contexts (comment/identifier/character | ||
1136 | + constant/string literal). */ | ||
1137 | + | ||
1138 | +/* LRE ->âª<- */ int pdf_\u202c_1; | ||
1139 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1140 | +/* RLE ->â«<- */ int pdf_\u202c_2; | ||
1141 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1142 | +/* LRO ->âÂ<- */ int pdf_\u202c_3; | ||
1143 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1144 | +/* RLO ->â®<- */ int pdf_\u202c_4; | ||
1145 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1146 | +/* LRI ->â¦<-*/ int pdi_\u2069_1; | ||
1147 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1148 | +/* RLI ->â§<- */ int pdi_\u2069_12; | ||
1149 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1150 | +/* FSI ->â¨<- */ int pdi_\u2069_3; | ||
1151 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1152 | + | ||
1153 | +const char *s1 = "LRE\u202a"; /* PDF ->â¬<- */ | ||
1154 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1155 | +/* LRE ->âª<- */ const char *s2 = "PDF\u202c"; | ||
1156 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1157 | +const char *s3 = "LRE\u202a"; int pdf_\u202c_5; | ||
1158 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1159 | +int lre_\u202a; const char *s4 = "PDF\u202c"; | ||
1160 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ | ||
1161 | diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h | ||
1162 | index 176f8c5bbce..112b9c24751 100644 | ||
1163 | --- a/libcpp/include/cpplib.h | ||
1164 | +++ b/libcpp/include/cpplib.h | ||
1165 | @@ -318,6 +318,17 @@ enum cpp_main_search | ||
1166 | CMS_system, /* Search the system INCLUDE path. */ | ||
1167 | }; | ||
1168 | |||
1169 | +/* The possible bidirectional control characters checking levels, from least | ||
1170 | + restrictive to most. */ | ||
1171 | +enum cpp_bidirectional_level { | ||
1172 | + /* No checking. */ | ||
1173 | + bidirectional_none, | ||
1174 | + /* Only detect unpaired uses of bidirectional control characters. */ | ||
1175 | + bidirectional_unpaired, | ||
1176 | + /* Detect any use of bidirectional control characters. */ | ||
1177 | + bidirectional_any | ||
1178 | +}; | ||
1179 | + | ||
1180 | /* This structure is nested inside struct cpp_reader, and | ||
1181 | carries all the options visible to the command line. */ | ||
1182 | struct cpp_options | ||
1183 | @@ -531,6 +542,10 @@ struct cpp_options | ||
1184 | /* True if warn about differences between C++98 and C++11. */ | ||
1185 | bool cpp_warn_cxx11_compat; | ||
1186 | |||
1187 | + /* Nonzero if bidirectional control characters checking is on. See enum | ||
1188 | + cpp_bidirectional_level. */ | ||
1189 | + unsigned char cpp_warn_bidirectional; | ||
1190 | + | ||
1191 | /* Dependency generation. */ | ||
1192 | struct | ||
1193 | { | ||
1194 | @@ -635,7 +650,8 @@ enum cpp_warning_reason { | ||
1195 | CPP_W_C90_C99_COMPAT, | ||
1196 | CPP_W_C11_C2X_COMPAT, | ||
1197 | CPP_W_CXX11_COMPAT, | ||
1198 | - CPP_W_EXPANSION_TO_DEFINED | ||
1199 | + CPP_W_EXPANSION_TO_DEFINED, | ||
1200 | + CPP_W_BIDIRECTIONAL | ||
1201 | }; | ||
1202 | |||
1203 | /* Callback for header lookup for HEADER, which is the name of a | ||
1204 | diff --git a/libcpp/init.c b/libcpp/init.c | ||
1205 | index 5a424e23553..f9a8f5f088f 100644 | ||
1206 | --- a/libcpp/init.c | ||
1207 | +++ b/libcpp/init.c | ||
1208 | @@ -219,6 +219,7 @@ cpp_create_reader (enum c_lang lang, cpp | ||
1209 | = ENABLE_CANONICAL_SYSTEM_HEADERS; | ||
1210 | CPP_OPTION (pfile, ext_numeric_literals) = 1; | ||
1211 | CPP_OPTION (pfile, warn_date_time) = 0; | ||
1212 | + CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired; | ||
1213 | |||
1214 | /* Default CPP arithmetic to something sensible for the host for the | ||
1215 | benefit of dumb users like fix-header. */ | ||
1216 | diff --git a/libcpp/internal.h b/libcpp/internal.h | ||
1217 | index 8577cab6c83..0ce0246c5a2 100644 | ||
1218 | --- a/libcpp/internal.h | ||
1219 | +++ b/libcpp/internal.h | ||
1220 | @@ -597,6 +597,13 @@ struct cpp_reader | ||
1221 | /* Location identifying the main source file -- intended to be line | ||
1222 | zero of said file. */ | ||
1223 | location_t main_loc; | ||
1224 | + | ||
1225 | + /* Returns true iff we should warn about UTF-8 bidirectional control | ||
1226 | + characters. */ | ||
1227 | + bool warn_bidi_p () const | ||
1228 | + { | ||
1229 | + return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none; | ||
1230 | + } | ||
1231 | }; | ||
1232 | |||
1233 | /* Character classes. Based on the more primitive macros in safe-ctype.h. | ||
1234 | diff --git a/libcpp/lex.c b/libcpp/lex.c | ||
1235 | index fa2253d41c3..6a4fbce6030 100644 | ||
1236 | --- a/libcpp/lex.c | ||
1237 | +++ b/libcpp/lex.c | ||
1238 | @@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfi | ||
1239 | } | ||
1240 | } | ||
1241 | |||
1242 | +namespace bidi { | ||
1243 | + enum class kind { | ||
1244 | + NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL | ||
1245 | + }; | ||
1246 | + | ||
1247 | + /* All the UTF-8 encodings of bidi characters start with E2. */ | ||
1248 | + constexpr uchar utf8_start = 0xe2; | ||
1249 | + | ||
1250 | + /* A vector holding currently open bidi contexts. We use a char for | ||
1251 | + each context, its LSB is 1 if it represents a PDF context, 0 if it | ||
1252 | + represents a PDI context. The next bit is 1 if this context was open | ||
1253 | + by a bidi character written as a UCN, and 0 when it was UTF-8. */ | ||
1254 | + semi_embedded_vec <unsigned char, 16> vec; | ||
1255 | + | ||
1256 | + /* Close the whole comment/identifier/string literal/character constant | ||
1257 | + context. */ | ||
1258 | + void on_close () | ||
1259 | + { | ||
1260 | + vec.truncate (0); | ||
1261 | + } | ||
1262 | + | ||
1263 | + /* Pop the last element in the vector. */ | ||
1264 | + void pop () | ||
1265 | + { | ||
1266 | + unsigned int len = vec.count (); | ||
1267 | + gcc_checking_assert (len > 0); | ||
1268 | + vec.truncate (len - 1); | ||
1269 | + } | ||
1270 | + | ||
1271 | + /* Return the context of the Ith element. */ | ||
1272 | + kind ctx_at (unsigned int i) | ||
1273 | + { | ||
1274 | + return (vec[i] & 1) ? kind::PDF : kind::PDI; | ||
1275 | + } | ||
1276 | + | ||
1277 | + /* Return which context is currently opened. */ | ||
1278 | + kind current_ctx () | ||
1279 | + { | ||
1280 | + unsigned int len = vec.count (); | ||
1281 | + if (len == 0) | ||
1282 | + return kind::NONE; | ||
1283 | + return ctx_at (len - 1); | ||
1284 | + } | ||
1285 | + | ||
1286 | + /* Return true if the current context comes from a UCN origin, that is, | ||
1287 | + the bidi char which started this bidi context was written as a UCN. */ | ||
1288 | + bool current_ctx_ucn_p () | ||
1289 | + { | ||
1290 | + unsigned int len = vec.count (); | ||
1291 | + gcc_checking_assert (len > 0); | ||
1292 | + return (vec[len - 1] >> 1) & 1; | ||
1293 | + } | ||
1294 | + | ||
1295 | + /* We've read a bidi char, update the current vector as necessary. */ | ||
1296 | + void on_char (kind k, bool ucn_p) | ||
1297 | + { | ||
1298 | + switch (k) | ||
1299 | + { | ||
1300 | + case kind::LRE: | ||
1301 | + case kind::RLE: | ||
1302 | + case kind::LRO: | ||
1303 | + case kind::RLO: | ||
1304 | + vec.push (ucn_p ? 3u : 1u); | ||
1305 | + break; | ||
1306 | + case kind::LRI: | ||
1307 | + case kind::RLI: | ||
1308 | + case kind::FSI: | ||
1309 | + vec.push (ucn_p ? 2u : 0u); | ||
1310 | + break; | ||
1311 | + /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO | ||
1312 | + whose scope has not yet been terminated. */ | ||
1313 | + case kind::PDF: | ||
1314 | + if (current_ctx () == kind::PDF) | ||
1315 | + pop (); | ||
1316 | + break; | ||
1317 | + /* PDI terminates the scope of the last LRI, RLI, or FSI whose | ||
1318 | + scope has not yet been terminated, as well as the scopes of | ||
1319 | + any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not | ||
1320 | + yet been terminated. */ | ||
1321 | + case kind::PDI: | ||
1322 | + for (int i = vec.count () - 1; i >= 0; --i) | ||
1323 | + if (ctx_at (i) == kind::PDI) | ||
1324 | + { | ||
1325 | + vec.truncate (i); | ||
1326 | + break; | ||
1327 | + } | ||
1328 | + break; | ||
1329 | + case kind::LTR: | ||
1330 | + case kind::RTL: | ||
1331 | + /* These aren't popped by a PDF/PDI. */ | ||
1332 | + break; | ||
1333 | + [[likely]] case kind::NONE: | ||
1334 | + break; | ||
1335 | + default: | ||
1336 | + abort (); | ||
1337 | + } | ||
1338 | + } | ||
1339 | + | ||
1340 | + /* Return a descriptive string for K. */ | ||
1341 | + const char *to_str (kind k) | ||
1342 | + { | ||
1343 | + switch (k) | ||
1344 | + { | ||
1345 | + case kind::LRE: | ||
1346 | + return "U+202A (LEFT-TO-RIGHT EMBEDDING)"; | ||
1347 | + case kind::RLE: | ||
1348 | + return "U+202B (RIGHT-TO-LEFT EMBEDDING)"; | ||
1349 | + case kind::LRO: | ||
1350 | + return "U+202D (LEFT-TO-RIGHT OVERRIDE)"; | ||
1351 | + case kind::RLO: | ||
1352 | + return "U+202E (RIGHT-TO-LEFT OVERRIDE)"; | ||
1353 | + case kind::LRI: | ||
1354 | + return "U+2066 (LEFT-TO-RIGHT ISOLATE)"; | ||
1355 | + case kind::RLI: | ||
1356 | + return "U+2067 (RIGHT-TO-LEFT ISOLATE)"; | ||
1357 | + case kind::FSI: | ||
1358 | + return "U+2068 (FIRST STRONG ISOLATE)"; | ||
1359 | + case kind::PDF: | ||
1360 | + return "U+202C (POP DIRECTIONAL FORMATTING)"; | ||
1361 | + case kind::PDI: | ||
1362 | + return "U+2069 (POP DIRECTIONAL ISOLATE)"; | ||
1363 | + case kind::LTR: | ||
1364 | + return "U+200E (LEFT-TO-RIGHT MARK)"; | ||
1365 | + case kind::RTL: | ||
1366 | + return "U+200F (RIGHT-TO-LEFT MARK)"; | ||
1367 | + default: | ||
1368 | + abort (); | ||
1369 | + } | ||
1370 | + } | ||
1371 | +} | ||
1372 | + | ||
1373 | +/* Parse a sequence of 3 bytes starting with P and return its bidi code. */ | ||
1374 | + | ||
1375 | +static bidi::kind | ||
1376 | +get_bidi_utf8 (const unsigned char *const p) | ||
1377 | +{ | ||
1378 | + gcc_checking_assert (p[0] == bidi::utf8_start); | ||
1379 | + | ||
1380 | + if (p[1] == 0x80) | ||
1381 | + switch (p[2]) | ||
1382 | + { | ||
1383 | + case 0xaa: | ||
1384 | + return bidi::kind::LRE; | ||
1385 | + case 0xab: | ||
1386 | + return bidi::kind::RLE; | ||
1387 | + case 0xac: | ||
1388 | + return bidi::kind::PDF; | ||
1389 | + case 0xad: | ||
1390 | + return bidi::kind::LRO; | ||
1391 | + case 0xae: | ||
1392 | + return bidi::kind::RLO; | ||
1393 | + case 0x8e: | ||
1394 | + return bidi::kind::LTR; | ||
1395 | + case 0x8f: | ||
1396 | + return bidi::kind::RTL; | ||
1397 | + default: | ||
1398 | + break; | ||
1399 | + } | ||
1400 | + else if (p[1] == 0x81) | ||
1401 | + switch (p[2]) | ||
1402 | + { | ||
1403 | + case 0xa6: | ||
1404 | + return bidi::kind::LRI; | ||
1405 | + case 0xa7: | ||
1406 | + return bidi::kind::RLI; | ||
1407 | + case 0xa8: | ||
1408 | + return bidi::kind::FSI; | ||
1409 | + case 0xa9: | ||
1410 | + return bidi::kind::PDI; | ||
1411 | + default: | ||
1412 | + break; | ||
1413 | + } | ||
1414 | + | ||
1415 | + return bidi::kind::NONE; | ||
1416 | +} | ||
1417 | + | ||
1418 | +/* Parse a UCN where P points just past \u or \U and return its bidi code. */ | ||
1419 | + | ||
1420 | +static bidi::kind | ||
1421 | +get_bidi_ucn (const unsigned char *p, bool is_U) | ||
1422 | +{ | ||
1423 | + /* 6.4.3 Universal Character Names | ||
1424 | + \u hex-quad | ||
1425 | + \U hex-quad hex-quad | ||
1426 | + where \unnnn means \U0000nnnn. */ | ||
1427 | + | ||
1428 | + if (is_U) | ||
1429 | + { | ||
1430 | + if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0') | ||
1431 | + return bidi::kind::NONE; | ||
1432 | + /* Skip 4B so we can treat \u and \U the same below. */ | ||
1433 | + p += 4; | ||
1434 | + } | ||
1435 | + | ||
1436 | + /* All code points we are looking for start with 20xx. */ | ||
1437 | + if (p[0] != '2' || p[1] != '0') | ||
1438 | + return bidi::kind::NONE; | ||
1439 | + else if (p[2] == '2') | ||
1440 | + switch (p[3]) | ||
1441 | + { | ||
1442 | + case 'a': | ||
1443 | + case 'A': | ||
1444 | + return bidi::kind::LRE; | ||
1445 | + case 'b': | ||
1446 | + case 'B': | ||
1447 | + return bidi::kind::RLE; | ||
1448 | + case 'c': | ||
1449 | + case 'C': | ||
1450 | + return bidi::kind::PDF; | ||
1451 | + case 'd': | ||
1452 | + case 'D': | ||
1453 | + return bidi::kind::LRO; | ||
1454 | + case 'e': | ||
1455 | + case 'E': | ||
1456 | + return bidi::kind::RLO; | ||
1457 | + default: | ||
1458 | + break; | ||
1459 | + } | ||
1460 | + else if (p[2] == '6') | ||
1461 | + switch (p[3]) | ||
1462 | + { | ||
1463 | + case '6': | ||
1464 | + return bidi::kind::LRI; | ||
1465 | + case '7': | ||
1466 | + return bidi::kind::RLI; | ||
1467 | + case '8': | ||
1468 | + return bidi::kind::FSI; | ||
1469 | + case '9': | ||
1470 | + return bidi::kind::PDI; | ||
1471 | + default: | ||
1472 | + break; | ||
1473 | + } | ||
1474 | + else if (p[2] == '0') | ||
1475 | + switch (p[3]) | ||
1476 | + { | ||
1477 | + case 'e': | ||
1478 | + case 'E': | ||
1479 | + return bidi::kind::LTR; | ||
1480 | + case 'f': | ||
1481 | + case 'F': | ||
1482 | + return bidi::kind::RTL; | ||
1483 | + default: | ||
1484 | + break; | ||
1485 | + } | ||
1486 | + | ||
1487 | + return bidi::kind::NONE; | ||
1488 | +} | ||
1489 | + | ||
1490 | +/* We're closing a bidi context, that is, we've encountered a newline, | ||
1491 | + are closing a C-style comment, or are at the end of a string literal, | ||
1492 | + character constant, or identifier. Warn if this context was not | ||
1493 | + properly terminated by a PDI or PDF. P points to the last character | ||
1494 | + in this context. */ | ||
1495 | + | ||
1496 | +static void | ||
1497 | +maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p) | ||
1498 | +{ | ||
1499 | + if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired | ||
1500 | + && bidi::vec.count () > 0) | ||
1501 | + { | ||
1502 | + const location_t loc | ||
1503 | + = linemap_position_for_column (pfile->line_table, | ||
1504 | + CPP_BUF_COLUMN (pfile->buffer, p)); | ||
1505 | + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, | ||
1506 | + "unpaired UTF-8 bidirectional control character " | ||
1507 | + "detected"); | ||
1508 | + } | ||
1509 | + /* We're done with this context. */ | ||
1510 | + bidi::on_close (); | ||
1511 | +} | ||
1512 | + | ||
1513 | +/* We're at the beginning or in the middle of an identifier/comment/string | ||
1514 | + literal/character constant. Warn if we've encountered a bidi character. | ||
1515 | + KIND says which bidi character it was; P points to it in the character | ||
1516 | + stream. UCN_P is true iff this bidi character was written as a UCN. */ | ||
1517 | + | ||
1518 | +static void | ||
1519 | +maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, | ||
1520 | + bool ucn_p) | ||
1521 | +{ | ||
1522 | + if (__builtin_expect (kind == bidi::kind::NONE, 1)) | ||
1523 | + return; | ||
1524 | + | ||
1525 | + const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional); | ||
1526 | + | ||
1527 | + if (warn_bidi != bidirectional_none) | ||
1528 | + { | ||
1529 | + const location_t loc | ||
1530 | + = linemap_position_for_column (pfile->line_table, | ||
1531 | + CPP_BUF_COLUMN (pfile->buffer, p)); | ||
1532 | + /* It seems excessive to warn about a PDI/PDF that is closing | ||
1533 | + an opened context because we've already warned about the | ||
1534 | + opening character. Except warn when we have a UCN x UTF-8 | ||
1535 | + mismatch. */ | ||
1536 | + if (kind == bidi::current_ctx ()) | ||
1537 | + { | ||
1538 | + if (warn_bidi == bidirectional_unpaired | ||
1539 | + && bidi::current_ctx_ucn_p () != ucn_p) | ||
1540 | + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, | ||
1541 | + "UTF-8 vs UCN mismatch when closing " | ||
1542 | + "a context by \"%s\"", bidi::to_str (kind)); | ||
1543 | + } | ||
1544 | + else if (warn_bidi == bidirectional_any) | ||
1545 | + { | ||
1546 | + if (kind == bidi::kind::PDF || kind == bidi::kind::PDI) | ||
1547 | + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, | ||
1548 | + "\"%s\" is closing an unopened context", | ||
1549 | + bidi::to_str (kind)); | ||
1550 | + else | ||
1551 | + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, | ||
1552 | + "found problematic Unicode character \"%s\"", | ||
1553 | + bidi::to_str (kind)); | ||
1554 | + } | ||
1555 | + } | ||
1556 | + /* We're done with this context. */ | ||
1557 | + bidi::on_char (kind, ucn_p); | ||
1558 | +} | ||
1559 | + | ||
1560 | /* Skip a C-style block comment. We find the end of the comment by | ||
1561 | seeing if an asterisk is before every '/' we encounter. Returns | ||
1562 | nonzero if comment terminated by EOF, zero otherwise. | ||
1563 | @@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfi | ||
1564 | cpp_buffer *buffer = pfile->buffer; | ||
1565 | const uchar *cur = buffer->cur; | ||
1566 | uchar c; | ||
1567 | + const bool warn_bidi_p = pfile->warn_bidi_p (); | ||
1568 | |||
1569 | cur++; | ||
1570 | if (*cur == '/') | ||
1571 | @@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfi | ||
1572 | if (c == '/') | ||
1573 | { | ||
1574 | if (cur[-2] == '*') | ||
1575 | - break; | ||
1576 | + { | ||
1577 | + if (warn_bidi_p) | ||
1578 | + maybe_warn_bidi_on_close (pfile, cur); | ||
1579 | + break; | ||
1580 | + } | ||
1581 | |||
1582 | /* Warn about potential nested comments, but not if the '/' | ||
1583 | comes immediately before the true comment delimiter. | ||
1584 | @@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfi | ||
1585 | { | ||
1586 | unsigned int cols; | ||
1587 | buffer->cur = cur - 1; | ||
1588 | + if (warn_bidi_p) | ||
1589 | + maybe_warn_bidi_on_close (pfile, cur); | ||
1590 | _cpp_process_line_notes (pfile, true); | ||
1591 | if (buffer->next_line >= buffer->rlimit) | ||
1592 | return true; | ||
1593 | @@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfi | ||
1594 | |||
1595 | cur = buffer->cur; | ||
1596 | } | ||
1597 | + /* If this is a beginning of a UTF-8 encoding, it might be | ||
1598 | + a bidirectional control character. */ | ||
1599 | + else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) | ||
1600 | + { | ||
1601 | + bidi::kind kind = get_bidi_utf8 (cur - 1); | ||
1602 | + maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false); | ||
1603 | + } | ||
1604 | } | ||
1605 | |||
1606 | buffer->cur = cur; | ||
1607 | @@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile) | ||
1608 | { | ||
1609 | cpp_buffer *buffer = pfile->buffer; | ||
1610 | location_t orig_line = pfile->line_table->highest_line; | ||
1611 | + const bool warn_bidi_p = pfile->warn_bidi_p (); | ||
1612 | |||
1613 | - while (*buffer->cur != '\n') | ||
1614 | - buffer->cur++; | ||
1615 | + if (!warn_bidi_p) | ||
1616 | + while (*buffer->cur != '\n') | ||
1617 | + buffer->cur++; | ||
1618 | + else | ||
1619 | + { | ||
1620 | + while (*buffer->cur != '\n' | ||
1621 | + && *buffer->cur != bidi::utf8_start) | ||
1622 | + buffer->cur++; | ||
1623 | + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) | ||
1624 | + { | ||
1625 | + while (*buffer->cur != '\n') | ||
1626 | + { | ||
1627 | + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) | ||
1628 | + { | ||
1629 | + bidi::kind kind = get_bidi_utf8 (buffer->cur); | ||
1630 | + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, | ||
1631 | + /*ucn_p=*/false); | ||
1632 | + } | ||
1633 | + buffer->cur++; | ||
1634 | + } | ||
1635 | + maybe_warn_bidi_on_close (pfile, buffer->cur); | ||
1636 | + } | ||
1637 | + } | ||
1638 | |||
1639 | _cpp_process_line_notes (pfile, true); | ||
1640 | return orig_line != pfile->line_table->highest_line; | ||
1641 | @@ -1317,11 +1671,13 @@ static const cppchar_t utf8_signifier = | ||
1642 | |||
1643 | /* Returns TRUE if the sequence starting at buffer->cur is valid in | ||
1644 | an identifier. FIRST is TRUE if this starts an identifier. */ | ||
1645 | + | ||
1646 | static bool | ||
1647 | forms_identifier_p (cpp_reader *pfile, int first, | ||
1648 | struct normalize_state *state) | ||
1649 | { | ||
1650 | cpp_buffer *buffer = pfile->buffer; | ||
1651 | + const bool warn_bidi_p = pfile->warn_bidi_p (); | ||
1652 | |||
1653 | if (*buffer->cur == '$') | ||
1654 | { | ||
1655 | @@ -1344,6 +1700,13 @@ forms_identifier_p (cpp_reader *pfile, i | ||
1656 | cppchar_t s; | ||
1657 | if (*buffer->cur >= utf8_signifier) | ||
1658 | { | ||
1659 | + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0) | ||
1660 | + && warn_bidi_p) | ||
1661 | + { | ||
1662 | + bidi::kind kind = get_bidi_utf8 (buffer->cur); | ||
1663 | + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, | ||
1664 | + /*ucn_p=*/false); | ||
1665 | + } | ||
1666 | if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first, | ||
1667 | state, &s)) | ||
1668 | return true; | ||
1669 | @@ -1352,6 +1715,13 @@ forms_identifier_p (cpp_reader *pfile, i | ||
1670 | && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) | ||
1671 | { | ||
1672 | buffer->cur += 2; | ||
1673 | + if (warn_bidi_p) | ||
1674 | + { | ||
1675 | + bidi::kind kind = get_bidi_ucn (buffer->cur, | ||
1676 | + buffer->cur[-1] == 'U'); | ||
1677 | + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, | ||
1678 | + /*ucn_p=*/true); | ||
1679 | + } | ||
1680 | if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, | ||
1681 | state, &s, NULL, NULL)) | ||
1682 | return true; | ||
1683 | @@ -1460,6 +1830,7 @@ lex_identifier (cpp_reader *pfile, const | ||
1684 | const uchar *cur; | ||
1685 | unsigned int len; | ||
1686 | unsigned int hash = HT_HASHSTEP (0, *base); | ||
1687 | + const bool warn_bidi_p = pfile->warn_bidi_p (); | ||
1688 | |||
1689 | cur = pfile->buffer->cur; | ||
1690 | if (! starts_ucn) | ||
1691 | @@ -1483,6 +1854,8 @@ lex_identifier (cpp_reader *pfile, const | ||
1692 | pfile->buffer->cur++; | ||
1693 | } | ||
1694 | } while (forms_identifier_p (pfile, false, nst)); | ||
1695 | + if (warn_bidi_p) | ||
1696 | + maybe_warn_bidi_on_close (pfile, pfile->buffer->cur); | ||
1697 | result = _cpp_interpret_identifier (pfile, base, | ||
1698 | pfile->buffer->cur - base); | ||
1699 | *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); | ||
1700 | @@ -1719,6 +2092,7 @@ static void | ||
1701 | lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base) | ||
1702 | { | ||
1703 | const uchar *pos = base; | ||
1704 | + const bool warn_bidi_p = pfile->warn_bidi_p (); | ||
1705 | |||
1706 | /* 'tis a pity this information isn't passed down from the lexer's | ||
1707 | initial categorization of the token. */ | ||
1708 | @@ -1955,8 +2329,15 @@ lex_raw_string (cpp_reader *pfile, cpp_t | ||
1709 | pos = base = pfile->buffer->cur; | ||
1710 | note = &pfile->buffer->notes[pfile->buffer->cur_note]; | ||
1711 | } | ||
1712 | + else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0) | ||
1713 | + && warn_bidi_p) | ||
1714 | + maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1), | ||
1715 | + /*ucn_p=*/false); | ||
1716 | } | ||
1717 | |||
1718 | + if (warn_bidi_p) | ||
1719 | + maybe_warn_bidi_on_close (pfile, pos); | ||
1720 | + | ||
1721 | if (CPP_OPTION (pfile, user_literals)) | ||
1722 | { | ||
1723 | /* If a string format macro, say from inttypes.h, is placed touching | ||
1724 | @@ -2051,15 +2432,27 @@ lex_string (cpp_reader *pfile, cpp_token | ||
1725 | else | ||
1726 | terminator = '>', type = CPP_HEADER_NAME; | ||
1727 | |||
1728 | + const bool warn_bidi_p = pfile->warn_bidi_p (); | ||
1729 | for (;;) | ||
1730 | { | ||
1731 | cppchar_t c = *cur++; | ||
1732 | |||
1733 | /* In #include-style directives, terminators are not escapable. */ | ||
1734 | if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') | ||
1735 | - cur++; | ||
1736 | + { | ||
1737 | + if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p) | ||
1738 | + { | ||
1739 | + bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U'); | ||
1740 | + maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true); | ||
1741 | + } | ||
1742 | + cur++; | ||
1743 | + } | ||
1744 | else if (c == terminator) | ||
1745 | - break; | ||
1746 | + { | ||
1747 | + if (warn_bidi_p) | ||
1748 | + maybe_warn_bidi_on_close (pfile, cur - 1); | ||
1749 | + break; | ||
1750 | + } | ||
1751 | else if (c == '\n') | ||
1752 | { | ||
1753 | cur--; | ||
1754 | @@ -2076,6 +2469,11 @@ lex_string (cpp_reader *pfile, cpp_token | ||
1755 | } | ||
1756 | else if (c == '\0') | ||
1757 | saw_NUL = true; | ||
1758 | + else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) | ||
1759 | + { | ||
1760 | + bidi::kind kind = get_bidi_utf8 (cur - 1); | ||
1761 | + maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false); | ||
1762 | + } | ||
1763 | } | ||
1764 | |||
1765 | if (saw_NUL && !pfile->state.skipping) | ||