summaryrefslogtreecommitdiffstats
path: root/meta/recipes-support/aspell/aspell/CVE-2019-20433-0001.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-support/aspell/aspell/CVE-2019-20433-0001.patch')
-rw-r--r--meta/recipes-support/aspell/aspell/CVE-2019-20433-0001.patch999
1 files changed, 999 insertions, 0 deletions
diff --git a/meta/recipes-support/aspell/aspell/CVE-2019-20433-0001.patch b/meta/recipes-support/aspell/aspell/CVE-2019-20433-0001.patch
new file mode 100644
index 0000000000..fd68461e32
--- /dev/null
+++ b/meta/recipes-support/aspell/aspell/CVE-2019-20433-0001.patch
@@ -0,0 +1,999 @@
1From de29341638833ba7717bd6b5e6850998454b044b Mon Sep 17 00:00:00 2001
2From: Kevin Atkinson <kevina@gnu.org>
3Date: Sat, 17 Aug 2019 17:06:53 -0400
4Subject: [PATCH 1/2] Don't allow null-terminated UCS-2/4 strings using the
5 original API.
6
7Detect if the encoding is UCS-2/4 and the length is -1 in affected API
8functions and refuse to convert the string. If the string ends up
9being converted somehow, abort with an error message in DecodeDirect
10and ConvDirect. To convert a null terminated string in
11Decode/ConvDirect, a negative number corresponding to the width of the
12underlying character type for the encoding is expected; for example,
13if the encoding is "ucs-2" then a the size is expected to be -2.
14
15Also fix a 1-3 byte over-read in DecodeDirect when reading UCS-2/4
16strings when a size is provided (found by OSS-Fuzz).
17
18Also fix a bug in DecodeDirect that caused DocumentChecker to return
19the wrong offsets when working with UCS-2/4 strings.
20
21CVE: CVE-2019-20433
22Upstream-Status: Backport [https://github.com/GNUAspell/aspell/commit/de29341638833ba7717bd6b5e6850998454b044b]
23
24[SG: - adjusted context
25 - discarded test changes as test framework is not available
26 - discarded manual entry changes for features that aren't backported]
27Signed-off-by: Stefan Ghinea <stefan.ghinea@windriver.com>
28---
29 auto/MkSrc/CcHelper.pm | 99 ++++++++++++++++++++++++++++++++++---
30 auto/MkSrc/Create.pm | 5 +-
31 auto/MkSrc/Info.pm | 5 +-
32 auto/MkSrc/ProcCc.pm | 24 +++++----
33 auto/MkSrc/ProcImpl.pm | 57 +++++++++++++++------
34 auto/MkSrc/Read.pm | 4 +-
35 auto/mk-src.in | 44 +++++++++++++++--
36 common/convert.cpp | 39 ++++++++++++---
37 common/convert.hpp | 38 +++++++++++++-
38 common/document_checker.cpp | 17 ++++++-
39 common/document_checker.hpp | 1 +
40 common/version.cpp | 15 ++++--
41 configure.ac | 8 +++
42 manual/aspell.texi | 58 ++++++++++++++++------
43 manual/readme.texi | 70 +++++++++++++++++++++-----
44 15 files changed, 409 insertions(+), 75 deletions(-)
45
46diff --git a/auto/MkSrc/CcHelper.pm b/auto/MkSrc/CcHelper.pm
47index f2de991..0044335 100644
48--- a/auto/MkSrc/CcHelper.pm
49+++ b/auto/MkSrc/CcHelper.pm
50@@ -10,8 +10,8 @@ BEGIN {
51 use Exporter;
52 our @ISA = qw(Exporter);
53 our @EXPORT = qw(to_c_return_type c_error_cond
54- to_type_name make_desc make_func call_func
55- make_c_method call_c_method form_c_method
56+ to_type_name make_desc make_func call_func get_c_func_name
57+ make_c_method make_wide_macro call_c_method form_c_method
58 make_cxx_method);
59 }
60
61@@ -90,6 +90,69 @@ sub make_func ( $ \@ $ ; \% ) {
62 ')'));
63 }
64
65+=item make_wide_version NAME @TYPES PARMS ; %ACCUM
66+
67+Creates the wide character version of the function if needed
68+
69+=cut
70+
71+sub make_wide_version ( $ \@ $ ; \% ) {
72+ my ($name, $d, $p, $accum) = @_;
73+ my @d = @$d;
74+ shift @d;
75+ return '' unless grep {$_->{type} eq 'encoded string'} @d;
76+ $accum->{sys_headers}{'stddef.h'} = true;
77+ $accum->{suffix}[5] = <<'---';
78+
79+/******************* private implemantion details *********************/
80+
81+#ifdef __cplusplus
82+# define aspell_cast_(type, expr) (static_cast<type>(expr))
83+# define aspell_cast_from_wide_(str) (static_cast<const void *>(str))
84+#else
85+# define aspell_cast_(type, expr) ((type)(expr))
86+# define aspell_cast_from_wide_(str) ((const char *)(str))
87+#endif
88+---
89+ my @parms = map {$_->{type} eq 'encoded string'
90+ ? ($_->{name}, $_->{name}.'_size')
91+ : $_->{name}} @d;
92+ $name = to_lower $name;
93+ $accum->{suffix}[0] = <<'---';
94+/**********************************************************************/
95+
96+#ifdef ASPELL_ENCODE_SETTING_SECURE
97+---
98+ $accum->{suffix}[2] = "#endif\n";
99+ my @args = map {$_->{type} eq 'encoded string'
100+ ? ($_->{name}, "$_->{name}_size", '-1')
101+ : $_->{name}} @d;
102+ $accum->{suffix}[1] .=
103+ (join '',
104+ "#define $name",
105+ '(', join(', ', @parms), ')',
106+ "\\\n ",
107+ $name, '_wide',
108+ '(', join(', ', @args), ')',
109+ "\n");
110+ @args = map {$_->{type} eq 'encoded string'
111+ ? ("aspell_cast_from_wide_($_->{name})",
112+ "$_->{name}_size*aspell_cast_(int,sizeof(*($_->{name})))",
113+ "sizeof(*($_->{name}))")
114+ : $_->{name}} @d;
115+ return (join '',
116+ "\n",
117+ "/* version of $name that is safe to use with (null terminated) wide characters */\n",
118+ '#define ',
119+ $name, '_w',
120+ '(', join(', ', @parms), ')',
121+ "\\\n ",
122+ $name, '_wide',
123+ '(', join(', ', @args), ')',
124+ "\n");
125+}
126+
127+
128 =item call_func NAME @TYPES PARMS ; %ACCUM
129
130 Return a string to call a func. Will prefix the function with return
131@@ -103,7 +166,6 @@ Parms can be any of:
132
133 sub call_func ( $ \@ $ ; \% ) {
134 my ($name, $d, $p, $accum) = @_;
135- $accum = {} unless defined $accum;
136 my @d = @$d;
137 my $func_ret = to_type_name(shift @d, {%$p,pos=>'return'}, %$accum);
138 return (join '',
139@@ -148,8 +210,14 @@ sub to_type_name ( $ $ ; \% ) {
140 my $name = $t->{name};
141 my $type = $t->{type};
142
143- return ( (to_type_name {%$d, type=>'string'}, $p, %$accum) ,
144- (to_type_name {%$d, type=>'int', name=>"$d->{name}_size"}, $p, %$accum) )
145+ if ($name eq 'encoded string' && $is_cc && $pos eq 'parm') {
146+ my @types = ((to_type_name {%$d, type=>($p->{wide}?'const void pointer':'string')}, $p, %$accum),
147+ (to_type_name {%$d, type=>'int', name=>"$d->{name}_size"}, $p, %$accum));
148+ push @types, (to_type_name {%$d, type=>'int', name=>"$d->{name}_type_width"}, $p, %$accum) if $p->{wide};
149+ return @types;
150+ }
151+ return ( (to_type_name {%$d, type=>($p->{wide}?'const void pointer':'string')}, $p, %$accum) ,
152+ (to_type_name {%$d, type=>'int', name=>"$d->{name}_size"}, $p, %$accum) )
153 if $name eq 'encoded string' && $is_cc && $pos eq 'parm';
154
155 my $str;
156@@ -174,7 +242,7 @@ sub to_type_name ( $ $ ; \% ) {
157 $str .= "String";
158 }
159 } elsif ($name eq 'encoded string') {
160- $str .= "const char *";
161+ $str .= $p->{wide} ? "const void *" : "const char *";
162 } elsif ($name eq '') {
163 $str .= "void";
164 } elsif ($name eq 'bool' && $is_cc) {
165@@ -186,7 +254,7 @@ sub to_type_name ( $ $ ; \% ) {
166 if ($t->{pointer}) {
167 $accum->{types}->{$name} = $t;
168 } else {
169- $accum->{headers}->{$t->{created_in}} = true;
170+ $accum->{headers}->{$t->{created_in}} = true unless $mode eq 'cc';
171 }
172 $str .= "$c_type Aspell" if $mode eq 'cc';
173 $str .= to_mixed($name);
174@@ -214,6 +282,7 @@ sub to_type_name ( $ $ ; \% ) {
175 return $str;
176 }
177
178+
179 =item make_desc DESC ; LEVEL
180
181 Make a C comment out of DESC optionally indenting it LEVEL spaces.
182@@ -286,6 +355,7 @@ sub form_c_method ($ $ $ ; \% )
183 } else {
184 $func = "aspell $class $name";
185 }
186+ $func .= " wide" if $p->{wide};
187 if (exists $d->{'const'}) {
188 splice @data, 1, 0, {type => "const $class", name=> $this_name};
189 } else {
190@@ -306,6 +376,21 @@ sub make_c_method ($ $ $ ; \%)
191 return &make_func(@ret);
192 }
193
194+sub get_c_func_name ($ $ $)
195+{
196+ my @ret = &form_c_method(@_);
197+ return undef unless @ret > 0;
198+ return to_lower $ret[0];
199+}
200+
201+sub make_wide_macro ($ $ $ ; \%)
202+{
203+ my @ret = &form_c_method(@_);
204+ return undef unless @ret > 0;
205+ my $str = &make_wide_version(@ret);
206+ return $str;
207+}
208+
209 sub call_c_method ($ $ $ ; \%)
210 {
211 my @ret = &form_c_method(@_);
212diff --git a/auto/MkSrc/Create.pm b/auto/MkSrc/Create.pm
213index d39b60e..630ede5 100644
214--- a/auto/MkSrc/Create.pm
215+++ b/auto/MkSrc/Create.pm
216@@ -77,8 +77,10 @@ sub create_cc_file ( % ) {
217 $file .= "#include \"aspell.h\"\n" if $p{type} eq 'cxx';
218 $file .= "#include \"settings.h\"\n" if $p{type} eq 'native_impl' && $p{name} eq 'errors';
219 $file .= "#include \"gettext.h\"\n" if $p{type} eq 'native_impl' && $p{name} eq 'errors';
220+ $file .= cmap {"#include <$_>\n"} sort keys %{$accum{sys_headers}};
221 $file .= cmap {"#include \"".to_lower($_).".hpp\"\n"} sort keys %{$accum{headers}};
222- $file .= "#ifdef __cplusplus\nextern \"C\" {\n#endif\n" if $p{header} && !$p{cxx};
223+ $file .= "\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n" if $p{header} && !$p{cxx};
224+ $file .= join('', grep {defined $_} @{$accum{prefix}});
225 $file .= "\nnamespace $p{namespace} {\n\n" if $p{cxx};
226 if (defined $info{forward}{proc}{$p{type}}) {
227 my @types = sort {$a->{name} cmp $b->{name}} (values %{$accum{types}});
228@@ -86,6 +88,7 @@ sub create_cc_file ( % ) {
229 }
230 $file .= "\n";
231 $file .= $body;
232+ $file .= join('', grep {defined $_} @{$accum{suffix}});
233 $file .= "\n\n}\n\n" if $p{cxx};
234 $file .= "#ifdef __cplusplus\n}\n#endif\n" if $p{header} && !$p{cxx};
235 $file .= "#endif /* $hm */\n" if $p{header};
236diff --git a/auto/MkSrc/Info.pm b/auto/MkSrc/Info.pm
237index c644028..ace8e21 100644
238--- a/auto/MkSrc/Info.pm
239+++ b/auto/MkSrc/Info.pm
240@@ -60,6 +60,7 @@ each proc sub should take the following argv
241 the object from which it is a member of
242 no native: do not attempt to create a native implementation
243 treat as object: treat as a object rather than a pointer
244+ no conv: do not converted an encoded string
245
246 The %info structure is initialized as follows:
247
248@@ -104,8 +105,8 @@ The %info structure is initialized as follows:
249 errors => {}, # possible errors
250 method => {
251 # A class method
252- options => ['desc', 'posib err', 'c func', 'const',
253- 'c only', 'c impl', 'cxx impl'],
254+ options => ['desc', 'posib err', 'c func', 'const', 'no conv', 'on conv error',
255+ 'c only', 'c impl', 'cxx impl', 'cc extra'],
256 groups => undef},
257 constructor => {
258 # A class constructor
259diff --git a/auto/MkSrc/ProcCc.pm b/auto/MkSrc/ProcCc.pm
260index 47c4338..98cc435 100644
261--- a/auto/MkSrc/ProcCc.pm
262+++ b/auto/MkSrc/ProcCc.pm
263@@ -23,7 +23,7 @@ use MkSrc::Info;
264 sub make_c_object ( $ @ );
265
266 $info{group}{proc}{cc} = sub {
267- my ($data) = @_;
268+ my ($data,@rest) = @_;
269 my $ret;
270 my $stars = (70 - length $data->{name})/2;
271 $ret .= "/";
272@@ -33,14 +33,14 @@ $info{group}{proc}{cc} = sub {
273 $ret .= "/\n";
274 foreach my $d (@{$data->{data}}) {
275 $ret .= "\n\n";
276- $ret .= $info{$d->{type}}{proc}{cc}->($d);
277+ $ret .= $info{$d->{type}}{proc}{cc}->($d,@rest);
278 }
279 $ret .= "\n\n";
280 return $ret;
281 };
282
283 $info{enum}{proc}{cc} = sub {
284- my ($d) = @_;
285+ my ($d,@rest) = @_;
286 my $n = "Aspell".to_mixed($d->{name});
287 return ("\n".
288 make_desc($d->{desc}).
289@@ -58,21 +58,26 @@ $info{struct}{proc}{cc} = sub {
290 };
291
292 $info{union}{proc}{cc} = sub {
293- return make_c_object "union", $_[0];
294+ return make_c_object "union", @_;
295 };
296
297 $info{class}{proc}{cc} = sub {
298- my ($d) = @_;
299+ my ($d,$accum) = @_;
300 my $class = $d->{name};
301 my $classname = "Aspell".to_mixed($class);
302 my $ret = "";
303 $ret .= "typedef struct $classname $classname;\n\n";
304 foreach (@{$d->{data}}) {
305- my $s = make_c_method($class, $_, {mode=>'cc'});
306+ my $s = make_c_method($class, $_, {mode=>'cc'}, %$accum);
307 next unless defined $s;
308 $ret .= "\n";
309 $ret .= make_desc($_->{desc});
310- $ret .= make_c_method($class, $_, {mode=>'cc'}).";\n";
311+ $ret .= make_c_method($class, $_, {mode=>'cc'}, %$accum).";\n";
312+ if (grep {$_->{type} eq 'encoded string'} @{$_->{data}}) {
313+ $ret .= make_c_method($class, $_, {mode=>'cc', wide=>true}, %$accum).";\n";
314+ $ret .= make_wide_macro($class, $_, {mode=>'cc'}, %$accum);
315+ }
316+ $ret .= "\n".$_->{'cc extra'}."\n" if defined $_->{'cc extra'};
317 }
318 $ret .= "\n";
319 return $ret;
320@@ -105,7 +110,8 @@ $info{errors}{proc}{cc} = sub {
321 };
322
323 sub make_c_object ( $ @ ) {
324- my ($t, $d) = @_;
325+ my ($t, $d, $accum) = @_;
326+ $accum = {} unless defined $accum;
327 my $struct;
328 $struct .= "Aspell";
329 $struct .= to_mixed($d->{name});
330@@ -120,7 +126,7 @@ sub make_c_object ( $ @ ) {
331 "\n};\n"),
332 "typedef $t $struct $struct;",
333 join ("\n",
334- map {make_c_method($d->{name}, $_, {mode=>'cc'}).";"}
335+ map {make_c_method($d->{name}, $_, {mode=>'cc'}, %$accum).";"}
336 grep {$_->{type} eq 'method'}
337 @{$d->{data}})
338 )."\n";
339diff --git a/auto/MkSrc/ProcImpl.pm b/auto/MkSrc/ProcImpl.pm
340index b8628fd..3d0f220 100644
341--- a/auto/MkSrc/ProcImpl.pm
342+++ b/auto/MkSrc/ProcImpl.pm
343@@ -45,10 +45,13 @@ $info{class}{proc}{impl} = sub {
344 foreach (grep {$_ ne ''} split /\s*,\s*/, $data->{'c impl headers'}) {
345 $accum->{headers}{$_} = true;
346 }
347- foreach my $d (@{$data->{data}}) {
348+ my @d = @{$data->{data}};
349+ while (@d) {
350+ my $d = shift @d;
351+ my $need_wide = false;
352 next unless one_of $d->{type}, qw(method constructor destructor);
353 my @parms = @{$d->{data}} if exists $d->{data};
354- my $m = make_c_method $data->{name}, $d, {mode=>'cc_cxx', use_name=>true}, %$accum;
355+ my $m = make_c_method $data->{name}, $d, {mode=>'cc_cxx', use_name=>true, wide=>$d->{wide}}, %$accum;
356 next unless defined $m;
357 $ret .= "extern \"C\" $m\n";
358 $ret .= "{\n";
359@@ -57,24 +60,49 @@ $info{class}{proc}{impl} = sub {
360 } else {
361 if ($d->{type} eq 'method') {
362 my $ret_type = shift @parms;
363- my $ret_native = to_type_name $ret_type, {mode=>'native_no_err', pos=>'return'}, %$accum;
364+ my $ret_native = to_type_name $ret_type, {mode=>'native_no_err', pos=>'return', wide=>$d->{wide}}, %$accum;
365 my $snum = 0;
366+ my $call_fun = $d->{name};
367+ my @call_parms;
368 foreach (@parms) {
369 my $n = to_lower($_->{name});
370- if ($_->{type} eq 'encoded string') {
371- $accum->{headers}{'mutable string'} = true;
372- $accum->{headers}{'convert'} = true;
373- $ret .= " ths->temp_str_$snum.clear();\n";
374- $ret .= " ths->to_internal_->convert($n, ${n}_size, ths->temp_str_$snum);\n";
375- $ret .= " unsigned int s$snum = ths->temp_str_$snum.size();\n";
376- $_ = "MutableString(ths->temp_str_$snum.mstr(), s$snum)";
377- $snum++;
378+ if ($_->{type} eq 'encoded string' && !exists($d->{'no conv'})) {
379+ $need_wide = true unless $d->{wide};
380+ die unless exists $d->{'posib err'};
381+ $accum->{headers}{'mutable string'} = true;
382+ $accum->{headers}{'convert'} = true;
383+ my $name = get_c_func_name $data->{name}, $d, {mode=>'cc_cxx', use_name=>true, wide=>$d->{wide}};
384+ $ret .= " ths->temp_str_$snum.clear();\n";
385+ if ($d->{wide}) {
386+ $ret .= " ${n}_size = get_correct_size(\"$name\", ths->to_internal_->in_type_width(), ${n}_size, ${n}_type_width);\n";
387+ } else {
388+ $ret .= " PosibErr<int> ${n}_fixed_size = get_correct_size(\"$name\", ths->to_internal_->in_type_width(), ${n}_size);\n";
389+ if (exists($d->{'on conv error'})) {
390+ $ret .= " if (${n}_fixed_size.get_err()) {\n";
391+ $ret .= " ".$d->{'on conv error'}."\n";
392+ $ret .= " } else {\n";
393+ $ret .= " ${n}_size = ${n}_fixed_size;\n";
394+ $ret .= " }\n";
395+ } else {
396+ $ret .= " ths->err_.reset(${n}_fixed_size.release_err());\n";
397+ $ret .= " if (ths->err_ != 0) return ".(c_error_cond $ret_type).";\n";
398+ }
399+ }
400+ $ret .= " ths->to_internal_->convert($n, ${n}_size, ths->temp_str_$snum);\n";
401+ $ret .= " unsigned int s$snum = ths->temp_str_$snum.size();\n";
402+ push @call_parms, "MutableString(ths->temp_str_$snum.mstr(), s$snum)";
403+ $snum++;
404+ } elsif ($_->{type} eq 'encoded string') {
405+ $need_wide = true unless $d->{wide};
406+ push @call_parms, $n, "${n}_size";
407+ push @call_parms, "${n}_type_width" if $d->{wide};
408+ $call_fun .= " wide" if $d->{wide};
409 } else {
410- $_ = $n;
411+ push @call_parms, $n;
412 }
413 }
414- my $parms = '('.(join ', ', @parms).')';
415- my $exp = "ths->".to_lower($d->{name})."$parms";
416+ my $parms = '('.(join ', ', @call_parms).')';
417+ my $exp = "ths->".to_lower($call_fun)."$parms";
418 if (exists $d->{'posib err'}) {
419 $accum->{headers}{'posib err'} = true;
420 $ret .= " PosibErr<$ret_native> ret = $exp;\n";
421@@ -118,6 +146,7 @@ $info{class}{proc}{impl} = sub {
422 }
423 }
424 $ret .= "}\n\n";
425+ unshift @d,{%$d, wide=>true} if $need_wide;
426 }
427 return $ret;
428 };
429diff --git a/auto/MkSrc/Read.pm b/auto/MkSrc/Read.pm
430index 4b3d1d0..4bf640e 100644
431--- a/auto/MkSrc/Read.pm
432+++ b/auto/MkSrc/Read.pm
433@@ -88,13 +88,13 @@ sub advance ( ) {
434 $in_pod = $1 if $line =~ /^\=(\w+)/;
435 $line = '' if $in_pod;
436 $in_pod = undef if $in_pod && $in_pod eq 'cut';
437- $line =~ s/\#.*$//;
438+ $line =~ s/(?<!\\)\#.*$//;
439 $line =~ s/^(\t*)//;
440 $level = $base_level + length($1);
441 $line =~ s/\s*$//;
442 ++$base_level if $line =~ s/^\{$//;
443 --$base_level if $line =~ s/^\}$//;
444- $line =~ s/\\([{}])/$1/g;
445+ $line =~ s/\\([{}#\\])/$1/g;
446 } while ($line eq '');
447 #print "$level:$line\n";
448 }
449diff --git a/auto/mk-src.in b/auto/mk-src.in
450index 0e7833a..eb3353f 100644
451--- a/auto/mk-src.in
452+++ b/auto/mk-src.in
453@@ -608,6 +608,7 @@ errors:
454 invalid expression
455 mesg => "%expression" is not a valid regular expression.
456 parms => expression
457+
458 }
459 group: speller
460 {
461@@ -650,6 +651,7 @@ class: speller
462 posib err
463 desc => Returns 0 if it is not in the dictionary,
464 1 if it is, or -1 on error.
465+ on conv error => return 0;
466 /
467 bool
468 encoded string: word
469@@ -715,6 +717,8 @@ class: speller
470 desc => Return NULL on error.
471 The word list returned by suggest is only
472 valid until the next call to suggest.
473+ on conv error =>
474+ word = NULL; word_size = 0;
475 /
476 const word list
477 encoded string: word
478@@ -840,7 +844,6 @@ class: document checker
479 void
480
481 method: process
482-
483 desc => Process a string.
484 The string passed in should only be split on
485 white space characters. Furthermore, between
486@@ -849,10 +852,10 @@ class: document checker
487 in the document. Passing in strings out of
488 order, skipping strings or passing them in
489 more than once may lead to undefined results.
490+ no conv
491 /
492 void
493- string: str
494- int: size
495+ encoded string: str
496
497 method: next misspelling
498
499@@ -860,9 +863,23 @@ class: document checker
500 processed string. If there are no more
501 misspelled words, then token.word will be
502 NULL and token.size will be 0
503+ cc extra =>
504+ \#define aspell_document_checker_next_misspelling_w(type, ths) \\
505+ aspell_document_checker_next_misspelling_adj(ths, sizeof(type))
506 /
507 token object
508
509+ method: next misspelling adj
510+ desc => internal: do not use
511+ c impl =>
512+ Token res = ths->next_misspelling();
513+ res.offset /= type_width;
514+ res.len /= type_width;
515+ return res;
516+ /
517+ token object
518+ int: type_width
519+
520 method: filter
521
522 desc => Returns the underlying filter class.
523@@ -922,9 +939,30 @@ class: string enumeration
524 ths->from_internal_->append_null(ths->temp_str);
525 return ths->temp_str.data();
526 \}
527+ cc extra =>
528+ \#define aspell_string_enumeration_next_w(type, ths) \\
529+ aspell_cast_(const type *, aspell_string_enumeration_next_wide(ths, sizeof(type)))
530 /
531 const string
532
533+ method: next wide
534+ c impl =>
535+ const char * s = ths->next();
536+ if (s == 0) {
537+ return s;
538+ } else if (ths->from_internal_ == 0) \{
539+ assert(type_width == 1);
540+ return s;
541+ \} else \{
542+ assert(type_width == ths->from_internal_->out_type_width());
543+ ths->temp_str.clear();
544+ ths->from_internal_->convert(s,-1,ths->temp_str);
545+ ths->from_internal_->append_null(ths->temp_str);
546+ return ths->temp_str.data();
547+ \}
548+ /
549+ const void pointer
550+ int: type_width
551 }
552 group: info
553 {
554diff --git a/common/convert.cpp b/common/convert.cpp
555index 1add95a..7ae0317 100644
556--- a/common/convert.cpp
557+++ b/common/convert.cpp
558@@ -541,18 +541,25 @@ namespace acommon {
559 // Trivial Conversion
560 //
561
562+ const char * unsupported_null_term_wide_string_msg =
563+ "Null-terminated wide-character strings unsupported when used this way.";
564+
565 template <typename Chr>
566 struct DecodeDirect : public Decode
567 {
568+ DecodeDirect() {type_width = sizeof(Chr);}
569 void decode(const char * in0, int size, FilterCharVector & out) const {
570 const Chr * in = reinterpret_cast<const Chr *>(in0);
571- if (size == -1) {
572+ if (size == -sizeof(Chr)) {
573 for (;*in; ++in)
574- out.append(*in);
575+ out.append(*in, sizeof(Chr));
576+ } else if (size <= -1) {
577+ fprintf(stderr, "%s\n", unsupported_null_term_wide_string_msg);
578+ abort();
579 } else {
580- const Chr * stop = reinterpret_cast<const Chr *>(in0 +size);
581+ const Chr * stop = reinterpret_cast<const Chr *>(in0) + size/sizeof(Chr);
582 for (;in != stop; ++in)
583- out.append(*in);
584+ out.append(*in, sizeof(Chr));
585 }
586 }
587 PosibErr<void> decode_ec(const char * in0, int size,
588@@ -565,6 +572,7 @@ namespace acommon {
589 template <typename Chr>
590 struct EncodeDirect : public Encode
591 {
592+ EncodeDirect() {type_width = sizeof(Chr);}
593 void encode(const FilterChar * in, const FilterChar * stop,
594 CharVector & out) const {
595 for (; in != stop; ++in) {
596@@ -594,11 +602,15 @@ namespace acommon {
597 template <typename Chr>
598 struct ConvDirect : public DirectConv
599 {
600+ ConvDirect() {type_width = sizeof(Chr);}
601 void convert(const char * in0, int size, CharVector & out) const {
602- if (size == -1) {
603+ if (size == -sizeof(Chr)) {
604 const Chr * in = reinterpret_cast<const Chr *>(in0);
605 for (;*in != 0; ++in)
606 out.append(in, sizeof(Chr));
607+ } else if (size <= -1) {
608+ fprintf(stderr, "%s\n", unsupported_null_term_wide_string_msg);
609+ abort();
610 } else {
611 out.append(in0, size);
612 }
613@@ -1121,5 +1133,20 @@ namespace acommon {
614 }
615 return 0;
616 }
617-
618+
619+ PosibErr<void> unsupported_null_term_wide_string_err_(const char * func) {
620+ static bool reported_to_stderr = false;
621+ PosibErr<void> err = make_err(other_error, unsupported_null_term_wide_string_msg);
622+ if (!reported_to_stderr) {
623+ CERR.printf("ERROR: %s: %s\n", func, unsupported_null_term_wide_string_msg);
624+ reported_to_stderr = true;
625+ }
626+ return err;
627+ }
628+
629+ void unsupported_null_term_wide_string_abort_(const char * func) {
630+ CERR.printf("%s: %s\n", unsupported_null_term_wide_string_msg);
631+ abort();
632+ }
633+
634 }
635diff --git a/common/convert.hpp b/common/convert.hpp
636index 76332ee..c948973 100644
637--- a/common/convert.hpp
638+++ b/common/convert.hpp
639@@ -7,6 +7,8 @@
640 #ifndef ASPELL_CONVERT__HPP
641 #define ASPELL_CONVERT__HPP
642
643+#include "settings.h"
644+
645 #include "string.hpp"
646 #include "posib_err.hpp"
647 #include "char_vector.hpp"
648@@ -25,8 +27,9 @@ namespace acommon {
649 typedef const Config CacheConfig;
650 typedef const char * CacheKey;
651 String key;
652+ int type_width; // type width in bytes
653 bool cache_key_eq(const char * l) const {return key == l;}
654- ConvBase() {}
655+ ConvBase() : type_width(1) {}
656 private:
657 ConvBase(const ConvBase &);
658 void operator=(const ConvBase &);
659@@ -56,6 +59,8 @@ namespace acommon {
660 virtual ~Encode() {}
661 };
662 struct DirectConv { // convert directly from in_code to out_code.
663+ int type_width; // type width in bytes
664+ DirectConv() : type_width(1) {}
665 // should not take ownership of decode and encode.
666 // decode and encode guaranteed to stick around for the life
667 // of the object.
668@@ -126,6 +131,9 @@ namespace acommon {
669 const char * in_code() const {return decode_->key.c_str();}
670 const char * out_code() const {return encode_->key.c_str();}
671
672+ int in_type_width() const {return decode_->type_width;}
673+ int out_type_width() const {return encode_->type_width;}
674+
675 void append_null(CharVector & out) const
676 {
677 const char nul[4] = {0,0,0,0}; // 4 should be enough
678@@ -191,6 +199,10 @@ namespace acommon {
679 }
680 }
681
682+ void convert(const void * in, int size, CharVector & out) {
683+ convert(static_cast<const char *>(in), size, out);
684+ }
685+
686 void generic_convert(const char * in, int size, CharVector & out);
687
688 };
689@@ -412,6 +424,30 @@ namespace acommon {
690 return operator()(str, str + byte_size);}
691 };
692
693+#ifdef SLOPPY_NULL_TERM_STRINGS
694+ static const bool sloppy_null_term_strings = true;
695+#else
696+ static const bool sloppy_null_term_strings = false;
697+#endif
698+
699+ PosibErr<void> unsupported_null_term_wide_string_err_(const char * func);
700+ void unsupported_null_term_wide_string_abort_(const char * func);
701+
702+ static inline PosibErr<int> get_correct_size(const char * func, int conv_type_width, int size) {
703+ if (sloppy_null_term_strings && size <= -1)
704+ return -conv_type_width;
705+ if (size <= -1 && -conv_type_width != size)
706+ return unsupported_null_term_wide_string_err_(func);
707+ return size;
708+ }
709+ static inline int get_correct_size(const char * func, int conv_type_width, int size, int type_width) {
710+ if ((sloppy_null_term_strings || type_width <= -1) && size <= -1)
711+ return -conv_type_width;
712+ if (size <= -1 && conv_type_width != type_width)
713+ unsupported_null_term_wide_string_abort_(func);
714+ return size;
715+ }
716+
717 }
718
719 #endif
720diff --git a/common/document_checker.cpp b/common/document_checker.cpp
721index 5e510c4..0ccf1cd 100644
722--- a/common/document_checker.cpp
723+++ b/common/document_checker.cpp
724@@ -44,7 +44,9 @@ namespace acommon {
725 void DocumentChecker::process(const char * str, int size)
726 {
727 proc_str_.clear();
728- conv_->decode(str, size, proc_str_);
729+ PosibErr<int> fixed_size = get_correct_size("aspell_document_checker_process", conv_->in_type_width(), size);
730+ if (!fixed_size.has_err())
731+ conv_->decode(str, fixed_size, proc_str_);
732 proc_str_.append(0);
733 FilterChar * begin = proc_str_.pbegin();
734 FilterChar * end = proc_str_.pend() - 1;
735@@ -53,6 +55,19 @@ namespace acommon {
736 tokenizer_->reset(begin, end);
737 }
738
739+ void DocumentChecker::process_wide(const void * str, int size, int type_width)
740+ {
741+ proc_str_.clear();
742+ int fixed_size = get_correct_size("aspell_document_checker_process", conv_->in_type_width(), size, type_width);
743+ conv_->decode(static_cast<const char *>(str), fixed_size, proc_str_);
744+ proc_str_.append(0);
745+ FilterChar * begin = proc_str_.pbegin();
746+ FilterChar * end = proc_str_.pend() - 1;
747+ if (filter_)
748+ filter_->process(begin, end);
749+ tokenizer_->reset(begin, end);
750+ }
751+
752 Token DocumentChecker::next_misspelling()
753 {
754 bool correct;
755diff --git a/common/document_checker.hpp b/common/document_checker.hpp
756index d35bb88..11a3c73 100644
757--- a/common/document_checker.hpp
758+++ b/common/document_checker.hpp
759@@ -36,6 +36,7 @@ namespace acommon {
760 PosibErr<void> setup(Tokenizer *, Speller *, Filter *);
761 void reset();
762 void process(const char * str, int size);
763+ void process_wide(const void * str, int size, int type_width);
764 Token next_misspelling();
765
766 Filter * filter() {return filter_;}
767diff --git a/common/version.cpp b/common/version.cpp
768index 414d938..9e60b75 100644
769--- a/common/version.cpp
770+++ b/common/version.cpp
771@@ -1,8 +1,17 @@
772 #include "settings.h"
773
774-extern "C" const char * aspell_version_string() {
775 #ifdef NDEBUG
776- return VERSION " NDEBUG";
777+# define NDEBUG_STR " NDEBUG"
778+#else
779+# define NDEBUG_STR
780+#endif
781+
782+#ifdef SLOPPY_NULL_TERM_STRINGS
783+# define SLOPPY_STR " SLOPPY"
784+#else
785+# define SLOPPY_STR
786 #endif
787- return VERSION;
788+
789+extern "C" const char * aspell_version_string() {
790+ return VERSION NDEBUG_STR SLOPPY_STR;
791 }
792diff --git a/configure.ac b/configure.ac
793index 60e3b39..a5d51e3 100644
794--- a/configure.ac
795+++ b/configure.ac
796@@ -73,6 +73,9 @@ AC_ARG_ENABLE(filter-version-control,
797 AC_ARG_ENABLE(32-bit-hash-fun,
798 AS_HELP_STRING([--enable-32-bit-hash-fun],[use 32-bit hash function for compiled dictionaries]))
799
800+AC_ARG_ENABLE(sloppy-null-term-strings,
801+ AS_HELP_STRING([--enable-sloppy-null-term-strings],[allows allow null terminated UCS-2 and UCS-4 strings]))
802+
803 AC_ARG_ENABLE(pspell-compatibility,
804 AS_HELP_STRING([--disable-pspell-compatibility],[don't install pspell compatibility libraries]))
805
806@@ -141,6 +144,11 @@ then
807 AC_DEFINE(USE_32_BIT_HASH_FUN, 1, [Defined if 32-bit hash function should be used for compiled dictionaries.])
808 fi
809
810+if test "$enable_sloppy_null_term_strings" = "yes"
811+then
812+ AC_DEFINE(SLOPPY_NULL_TERM_STRINGS, 1, [Defined if null-terminated UCS-2 and UCS-4 strings should always be allowed.])
813+fi
814+
815 AM_CONDITIONAL(PSPELL_COMPATIBILITY,
816 [test "$enable_pspell_compatibility" != "no"])
817 AM_CONDITIONAL(INCREMENTED_SONAME,
818diff --git a/manual/aspell.texi b/manual/aspell.texi
819index 45fa091..f400e06 100644
820--- a/manual/aspell.texi
821+++ b/manual/aspell.texi
822@@ -158,7 +158,8 @@ Installing
823
824 * Generic Install Instructions::
825 * HTML Manuals and "make clean"::
826-* Curses Notes::
827+* Curses Notes::
828+* Upgrading from Aspell 0.60.7::
829 * Loadable Filter Notes::
830 * Upgrading from Aspell 0.50::
831 * Upgrading from Aspell .33/Pspell .12::
832@@ -2206,18 +2207,26 @@ int correct = aspell_speller_check(spell_checker, @var{word}, @var{size});
833 @end smallexample
834
835 @noindent
836-@var{word} is expected to be a @code{const char *} character
837-string. If the encoding is set to be @code{ucs-2} or
838-@code{ucs-4} @var{word} is expected to be a cast
839-from either @code{const u16int *} or @code{const u32int *}
840-respectively. @code{u16int} and @code{u32int} are generally
841-@code{unsigned short} and @code{unsigned int} respectively.
842-@var{size} is the length of the string or @code{-1} if the string
843-is null terminated. If the string is a cast from @code{const u16int
844-*} or @code{const u32int *} then @code{@i{size}} is the amount of
845-space in bytes the string takes up after being cast to @code{const
846-char *} and not the true size of the string. @code{sspell_speller_check}
847-will return @code{0} if it is not found and non-zero otherwise.
848+@var{word} is expected to be a @code{const char *} character string.
849+@var{size} is the length of the string or @code{-1} if the string is
850+null terminated. @code{aspell_speller_check} will return @code{0} if it is not found
851+and non-zero otherwise.
852+
853+If you are using the @code{ucs-2} or @code{ucs-4} encoding then the
854+string is expected to be either a 2 or 4 byte wide integer
855+(respectively) and the @code{_w} macro vesion should be used:
856+
857+@smallexample
858+int correct = aspell_speller_check_w(spell_checker, @var{word}, @var{size});
859+@end smallexample
860+
861+The macro will cast the string to to the correct type and convert
862+@var{size} into bytes for you and then a call the special wide version of the
863+function that will make sure the encoding is correct for the type
864+passed in. For compatibility with older versions of Aspell the normal
865+non-wide functions can still be used provided that the size of the
866+string, in bytes, is also passed in. Null terminated @code{ucs-2} or
867+@code{ucs-4} are no longer supported when using the non-wide functions.
868
869 If the word is not correct, then the @code{suggest} method can be used
870 to come up with likely replacements.
871@@ -2236,7 +2245,28 @@ delete_aspell_string_enumeration(elements);
872
873 Notice how @code{elements} is deleted but @code{suggestions} is not.
874 The value returned by @code{suggestions} is only valid to the next
875-call to @code{suggest}. Once a replacement is made the
876+call to @code{suggest}.
877+
878+If you are using the @code{ucs-2} or @code{ucs-4} encoding then, in
879+addition to using the @code{_w} macro for the @code{suggest} method, you
880+should also use the @code{_w} macro with the @code{next} method which
881+will cast the string to the correct type for you. For example, if you
882+are using the @code{ucs-2} encoding and the string is a @code{const
883+uint16_t *} then you should use:
884+
885+@smallexample
886+AspellWordList * suggestions = aspell_speller_suggest_w(spell_checker,
887+ @var{word}, @var{size});
888+AspellStringEnumeration * elements = aspell_word_list_elements(suggestions);
889+const uint16_t * word;
890+while ( (word = aspell_string_enumeration_next_w(uint16_t, aspell_elements)) != NULL )
891+@{
892+ // add to suggestion list
893+@}
894+delete_aspell_string_enumeration(elements);
895+@end smallexample
896+
897+Once a replacement is made the
898 @code{store_repl} method should be used to communicate the replacement
899 pair back to the spell checker (for the reason, @pxref{Notes on
900 Storing Replacement Pairs}). Its usage is as follows:
901diff --git a/manual/readme.texi b/manual/readme.texi
902index 669ab8e..531721f 100644
903--- a/manual/readme.texi
904+++ b/manual/readme.texi
905@@ -15,15 +15,16 @@ The latest version can always be found at GNU Aspell's home page at
906 @uref{http://aspell.net}.
907
908 @menu
909-* Generic Install Instructions::
910-* HTML Manuals and "make clean"::
911-* Curses Notes::
912-* Loadable Filter Notes::
913-* Using 32-Bit Dictionaries on a 64-Bit System::
914-* Upgrading from Aspell 0.50::
915-* Upgrading from Aspell .33/Pspell .12::
916-* Upgrading from a Pre-0.50 snapshot::
917-* WIN32 Notes::
918+* Generic Install Instructions::
919+* HTML Manuals and "make clean"::
920+* Curses Notes::
921+* Upgrading from Aspell 0.60.7::
922+* Loadable Filter Notes::
923+* Using 32-Bit Dictionaries on a 64-Bit System::
924+* Upgrading from Aspell 0.50::
925+* Upgrading from Aspell .33/Pspell .12::
926+* Upgrading from a Pre-0.50 snapshot::
927+* WIN32 Notes::
928 @end menu
929
930 @node Generic Install Instructions
931@@ -121,17 +122,62 @@ In addition your system must also support the @code{mblen} function.
932 Although this function was defined in the ISO C89 standard (ANSI
933 X3.159-1989), not all systems have it.
934
935+@node Upgrading from Aspell 0.60.7
936+@appendixsec Upgrading from Aspell 0.60.7
937+
938+To prevent a potentially unbounded buffer over-read, Aspell no longer
939+supports null-terminated UCS-2 and UCS-4 encoded strings with the
940+original C API. Null-termianted 8-bit or UTF-8 encoded strings are
941+still supported, as are UCS-2 and UCS-4 encoded strings when the
942+length is passed in.
943+
944+As of Aspell 0.60.8 a function from the original API that expects an
945+encoded string as a parameter will return meaningless results (or an
946+error code) if string is null terminated and the encoding is set to
947+@code{ucs-2} or @code{ucs-4}. In addition, a single:
948+@example
949+ERROR: aspell_speller_check: Null-terminated wide-character strings unsupported when used this way.
950+@end example
951+will be printed to standard error the first time one of those
952+functions is called.
953+
954+Application that use null-terminated UCS-2/4 strings should either (1)
955+use the interface intended for working with wide-characters
956+(@xref{Through the C API}); or (2) define
957+@code{ASPELL_ENCODE_SETTING_SECURE} before including @code{aspell.h}.
958+In the latter case is is important that the application explicitly
959+sets the encoding to a known value. Defining
960+@code{ASPELL_ENCODE_SETTING_SECURE} and not setting the encoding
961+explicitly or allowing user of the application to set the encoding
962+could result in an unbounded buffer over-read.
963+
964+If it is necessary to preserve binary compatibility with older
965+versions of Aspell, the easiest thing would be to determine the length
966+of the UCS-2/4 string---in bytes---and pass that in. Due to an
967+implemenation detail, existing API functions can be made to work with
968+null-terminated UCS-2/4 strings safely by passing in either @code{-2}
969+or @code{-4} (corresponding to the width of the character type) as the
970+size. Doing so, however, will cause a buffer over-read for unpatched
971+version of Aspell. To avoid this it will be necessary to parse the
972+version string to determine the correct value to use. However, no
973+official support will be provided for the latter method.
974+
975+If the application can not be recompiled, then Aspell can be configured
976+to preserve the old behavior by passing
977+@option{--enable-sloppy-null-term-strings} to @command{configure}. When Aspell
978+is compiled this way the version string will include the string
979+@samp{ SLOPPY}.
980+
981 @node Loadable Filter Notes
982 @appendixsec Loadable Filter Notes
983-
984+
985 Support for being able to load additional filter modules at run-time
986 has only been verified to work on Linux platforms. If you get linker
987 errors when trying to use a filter, then it is likely that loadable
988 filter support is not working yet on your platform. Thus, in order to
989 get Aspell to work correctly you will need to avoid compiling the
990 filters as individual modules by using the
991-@option{--enable-compile-in-filters} when configuring Aspell with
992-@command{./configure}.
993+@option{--enable-compile-in-filters} @command{configure} option.
994
995 @node Using 32-Bit Dictionaries on a 64-Bit System
996 @appendixsec Using 32-Bit Dictionaries on a 64-Bit System
997--
9982.17.1
999