summaryrefslogtreecommitdiffstats
path: root/meta/recipes-core/coreutils/coreutils-6.9/coreutils-i18n.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-core/coreutils/coreutils-6.9/coreutils-i18n.patch')
-rw-r--r--meta/recipes-core/coreutils/coreutils-6.9/coreutils-i18n.patch4051
1 files changed, 4051 insertions, 0 deletions
diff --git a/meta/recipes-core/coreutils/coreutils-6.9/coreutils-i18n.patch b/meta/recipes-core/coreutils/coreutils-6.9/coreutils-i18n.patch
new file mode 100644
index 0000000000..653722348a
--- /dev/null
+++ b/meta/recipes-core/coreutils/coreutils-6.9/coreutils-i18n.patch
@@ -0,0 +1,4051 @@
1Upstream-Status: Inappropriate [legacy version]
2
3This patch was imported from the Fedora Core 8 coreutils-6.9-9 package.
4
5The package is stated as being Licensed as GPLv2+.
6
7The comment indicates that the purpose is lin18nux/lsb compliance.
8
9Signed-off-by: Mark Hatle <mark.hatle@windriver.com>
10
11--- /dev/null 2007-03-01 09:16:39.219409909 +0000
12+++ coreutils-6.8+/tests/sort/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000
13@@ -0,0 +1,58 @@
14+#! /bin/sh
15+case $# in
16+ 0) xx='../../src/sort';;
17+ *) xx="$1";;
18+esac
19+test "$VERBOSE" && echo=echo || echo=:
20+$echo testing program: $xx
21+errors=0
22+test "$srcdir" || srcdir=.
23+test "$VERBOSE" && $xx --version 2> /dev/null
24+
25+export LC_ALL=en_US.UTF-8
26+locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
27+errors=0
28+
29+$xx -t @ -k2 -n mb1.I > mb1.O
30+code=$?
31+if test $code != 0; then
32+ $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
33+ errors=`expr $errors + 1`
34+else
35+ cmp mb1.O $srcdir/mb1.X > /dev/null 2>&1
36+ case $? in
37+ 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
38+ 1) $echo "Test mb1 failed: files mb1.O and $srcdir/mb1.X differ" 1>&2
39+ (diff -c mb1.O $srcdir/mb1.X) 2> /dev/null
40+ errors=`expr $errors + 1`;;
41+ 2) $echo "Test mb1 may have failed." 1>&2
42+ $echo The command "cmp mb1.O $srcdir/mb1.X" failed. 1>&2
43+ errors=`expr $errors + 1`;;
44+ esac
45+fi
46+
47+$xx -t @ -k4 -n mb2.I > mb2.O
48+code=$?
49+if test $code != 0; then
50+ $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
51+ errors=`expr $errors + 1`
52+else
53+ cmp mb2.O $srcdir/mb2.X > /dev/null 2>&1
54+ case $? in
55+ 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
56+ 1) $echo "Test mb2 failed: files mb2.O and $srcdir/mb2.X differ" 1>&2
57+ (diff -c mb2.O $srcdir/mb2.X) 2> /dev/null
58+ errors=`expr $errors + 1`;;
59+ 2) $echo "Test mb2 may have failed." 1>&2
60+ $echo The command "cmp mb2.O $srcdir/mb2.X" failed. 1>&2
61+ errors=`expr $errors + 1`;;
62+ esac
63+fi
64+
65+if test $errors = 0; then
66+ $echo Passed all 113 tests. 1>&2
67+else
68+ $echo Failed $errors tests. 1>&2
69+fi
70+test $errors = 0 || errors=1
71+exit $errors
72--- /dev/null 2007-03-01 09:16:39.219409909 +0000
73+++ coreutils-6.8+/tests/sort/mb2.I 2007-03-01 15:08:24.000000000 +0000
74@@ -0,0 +1,4 @@
75+Apple@AA10@@20
76+Banana@AA5@@30
77+Citrus@AA20@@5
78+Cherry@AA30@@10
79--- /dev/null 2007-03-01 09:16:39.219409909 +0000
80+++ coreutils-6.8+/tests/sort/mb2.X 2007-03-01 15:08:24.000000000 +0000
81@@ -0,0 +1,4 @@
82+Citrus@AA20@@5
83+Cherry@AA30@@10
84+Apple@AA10@@20
85+Banana@AA5@@30
86--- /dev/null 2007-03-01 09:16:39.219409909 +0000
87+++ coreutils-6.8+/tests/sort/mb1.I 2007-03-01 15:08:24.000000000 +0000
88@@ -0,0 +1,4 @@
89+Apple@10
90+Banana@5
91+Citrus@20
92+Cherry@30
93--- /dev/null 2007-03-01 09:16:39.219409909 +0000
94+++ coreutils-6.8+/tests/sort/mb1.X 2007-03-01 15:08:24.000000000 +0000
95@@ -0,0 +1,4 @@
96+Banana@5
97+Apple@10
98+Citrus@20
99+Cherry@30
100--- coreutils-6.8+/tests/sort/Makefile.am.i18n 2007-01-24 07:47:37.000000000 +0000
101+++ coreutils-6.8+/tests/sort/Makefile.am 2007-03-01 15:09:59.000000000 +0000
102@@ -66,15 +66,17 @@
103 bigfield.O bigfield.E
104 ##test-files-end
105
106-EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen)
107-noinst_SCRIPTS = $x-tests
108+run_gen += mb1.0 mb2.0
109+
110+EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X
111+noinst_SCRIPTS = $x-tests # $x-mb-tests
112 TESTS_ENVIRONMENT = \
113 CU_TEST_NAME=`basename $(abs_srcdir)`,$$tst \
114 PATH="$(VG_PATH_PREFIX)`pwd`/../../src$(PATH_SEPARATOR)$$PATH"
115
116 editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g'
117
118-TESTS = $x-tests
119+TESTS = $x-tests $x-mb-tests
120
121 mk_script = $(srcdir)/../mk-script
122 $(srcdir)/$x-tests: $(mk_script) Test.pm Makefile.am
123--- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100
124+++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000
125@@ -22,6 +22,11 @@
126
127 # include <stdio.h>
128
129+/* Get mbstate_t. */
130+# if HAVE_WCHAR_H
131+# include <wchar.h>
132+# endif
133+
134 /* A `struct linebuffer' holds a line of text. */
135
136 struct linebuffer
137@@ -29,6 +34,9 @@
138 size_t size; /* Allocated. */
139 size_t length; /* Used. */
140 char *buffer;
141+# if HAVE_WCHAR_H
142+ mbstate_t state;
143+# endif
144 };
145
146 /* Initialize linebuffer LINEBUFFER for use. */
147--- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000
148+++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000
149@@ -38,11 +38,28 @@
150 #include <stdio.h>
151 #include <getopt.h>
152 #include <sys/types.h>
153+
154+/* Get mbstate_t, mbrtowc(), wcwidth(). */
155+#if HAVE_WCHAR_H
156+# include <wchar.h>
157+#endif
158+
159 #include "system.h"
160 #include "error.h"
161 #include "quote.h"
162 #include "xstrndup.h"
163
164+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
165+ installation; work around this configuration error. */
166+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
167+# define MB_LEN_MAX 16
168+#endif
169+
170+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
171+#if HAVE_MBRTOWC && defined mbstate_t
172+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
173+#endif
174+
175 /* The official name of this program (e.g., no `g' prefix). */
176 #define PROGRAM_NAME "expand"
177
178@@ -183,6 +200,7 @@
179 stops = num_start + len - 1;
180 }
181 }
182+
183 else
184 {
185 error (0, 0, _("tab size contains invalid character(s): %s"),
186@@ -365,6 +383,142 @@
187 }
188 }
189
190+#if HAVE_MBRTOWC
191+static void
192+expand_multibyte (void)
193+{
194+ FILE *fp; /* Input strem. */
195+ mbstate_t i_state; /* Current shift state of the input stream. */
196+ mbstate_t i_state_bak; /* Back up the I_STATE. */
197+ mbstate_t o_state; /* Current shift state of the output stream. */
198+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
199+ char *bufpos; /* Next read position of BUF. */
200+ size_t buflen = 0; /* The length of the byte sequence in buf. */
201+ wchar_t wc; /* A gotten wide character. */
202+ size_t mblength; /* The byte size of a multibyte character
203+ which shows as same character as WC. */
204+ int tab_index = 0; /* Index in `tab_list' of next tabstop. */
205+ int column = 0; /* Column on screen of the next char. */
206+ int next_tab_column; /* Column the next tab stop is on. */
207+ int convert = 1; /* If nonzero, perform translations. */
208+
209+ fp = next_file ((FILE *) NULL);
210+ if (fp == NULL)
211+ return;
212+
213+ memset (&o_state, '\0', sizeof(mbstate_t));
214+ memset (&i_state, '\0', sizeof(mbstate_t));
215+
216+ for (;;)
217+ {
218+ /* Refill the buffer BUF. */
219+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
220+ {
221+ memmove (buf, bufpos, buflen);
222+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
223+ bufpos = buf;
224+ }
225+
226+ /* No character is left in BUF. */
227+ if (buflen < 1)
228+ {
229+ fp = next_file (fp);
230+
231+ if (fp == NULL)
232+ break; /* No more files. */
233+ else
234+ {
235+ memset (&i_state, '\0', sizeof(mbstate_t));
236+ continue;
237+ }
238+ }
239+
240+ /* Get a wide character. */
241+ i_state_bak = i_state;
242+ mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
243+
244+ switch (mblength)
245+ {
246+ case (size_t)-1: /* illegal byte sequence. */
247+ case (size_t)-2:
248+ mblength = 1;
249+ i_state = i_state_bak;
250+ if (convert)
251+ {
252+ ++column;
253+ if (convert_entire_line == 0)
254+ convert = 0;
255+ }
256+ putchar (*bufpos);
257+ break;
258+
259+ case 0: /* null. */
260+ mblength = 1;
261+ if (convert && convert_entire_line == 0)
262+ convert = 0;
263+ putchar ('\0');
264+ break;
265+
266+ default:
267+ if (wc == L'\n') /* LF. */
268+ {
269+ tab_index = 0;
270+ column = 0;
271+ convert = 1;
272+ putchar ('\n');
273+ }
274+ else if (wc == L'\t' && convert) /* Tab. */
275+ {
276+ if (tab_size == 0)
277+ {
278+ /* Do not let tab_index == first_free_tab;
279+ stop when it is 1 less. */
280+ while (tab_index < first_free_tab - 1
281+ && column >= tab_list[tab_index])
282+ tab_index++;
283+ next_tab_column = tab_list[tab_index];
284+ if (tab_index < first_free_tab - 1)
285+ tab_index++;
286+ if (column >= next_tab_column)
287+ next_tab_column = column + 1;
288+ }
289+ else
290+ next_tab_column = column + tab_size - column % tab_size;
291+
292+ while (column < next_tab_column)
293+ {
294+ putchar (' ');
295+ ++column;
296+ }
297+ }
298+ else /* Others. */
299+ {
300+ if (convert)
301+ {
302+ if (wc == L'\b')
303+ {
304+ if (column > 0)
305+ --column;
306+ }
307+ else
308+ {
309+ int width; /* The width of WC. */
310+
311+ width = wcwidth (wc);
312+ column += (width > 0) ? width : 0;
313+ if (convert_entire_line == 0)
314+ convert = 0;
315+ }
316+ }
317+ fwrite (bufpos, sizeof(char), mblength, stdout);
318+ }
319+ }
320+ buflen -= mblength;
321+ bufpos += mblength;
322+ }
323+}
324+#endif
325+
326 int
327 main (int argc, char **argv)
328 {
329@@ -429,7 +583,12 @@
330
331 file_list = (optind < argc ? &argv[optind] : stdin_argv);
332
333- expand ();
334+#if HAVE_MBRTOWC
335+ if (MB_CUR_MAX > 1)
336+ expand_multibyte ();
337+ else
338+#endif
339+ expand ();
340
341 if (have_read_stdin && fclose (stdin) != 0)
342 error (EXIT_FAILURE, errno, "-");
343--- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000
344+++ coreutils-6.8+/src/join.c 2007-03-01 15:08:24.000000000 +0000
345@@ -23,16 +23,30 @@
346 #include <sys/types.h>
347 #include <getopt.h>
348
349+/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
350+#if HAVE_WCHAR_H
351+# include <wchar.h>
352+#endif
353+
354+/* Get iswblank(), towupper. */
355+#if HAVE_WCTYPE_H
356+# include <wctype.h>
357+#endif
358+
359 #include "system.h"
360 #include "error.h"
361 #include "hard-locale.h"
362 #include "linebuffer.h"
363-#include "memcasecmp.h"
364 #include "quote.h"
365 #include "stdio--.h"
366 #include "xmemcoll.h"
367 #include "xstrtol.h"
368
369+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
370+#if HAVE_MBRTOWC && defined mbstate_t
371+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
372+#endif
373+
374 /* The official name of this program (e.g., no `g' prefix). */
375 #define PROGRAM_NAME "join"
376
377@@ -104,10 +118,12 @@
378 /* Last element in `outlist', where a new element can be added. */
379 static struct outlist *outlist_end = &outlist_head;
380
381-/* Tab character separating fields. If negative, fields are separated
382- by any nonempty string of blanks, otherwise by exactly one
383- tab character whose value (when cast to unsigned char) equals TAB. */
384-static int tab = -1;
385+/* Tab character separating fields. If NULL, fields are separated
386+ by any nonempty string of blanks. */
387+static char *tab = NULL;
388+
389+/* The number of bytes used for tab. */
390+static size_t tablen = 0;
391
392 static struct option const longopts[] =
393 {
394@@ -190,6 +206,8 @@
395
396 /* Fill in the `fields' structure in LINE. */
397
398+/* Fill in the `fields' structure in LINE. */
399+
400 static void
401 xfields (struct line *line)
402 {
403@@ -199,10 +217,11 @@
404 if (ptr == lim)
405 return;
406
407- if (0 <= tab)
408+ if (tab != NULL)
409 {
410+ unsigned char t = tab[0];
411 char *sep;
412- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
413+ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
414 extract_field (line, ptr, sep - ptr);
415 }
416 else
417@@ -229,6 +248,148 @@
418 extract_field (line, ptr, lim - ptr);
419 }
420
421+#if HAVE_MBRTOWC
422+static void
423+xfields_multibyte (struct line *line)
424+{
425+ char *ptr = line->buf.buffer;
426+ char const *lim = ptr + line->buf.length - 1;
427+ wchar_t wc = 0;
428+ size_t mblength = 1;
429+ mbstate_t state, state_bak;
430+
431+ memset (&state, 0, sizeof (mbstate_t));
432+
433+ if (ptr == lim)
434+ return;
435+
436+ if (tab != NULL)
437+ {
438+ unsigned char t = tab[0];
439+ char *sep = ptr;
440+ for (; ptr < lim; ptr = sep + mblength)
441+ {
442+ sep = ptr;
443+ while (sep < lim)
444+ {
445+ state_bak = state;
446+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
447+
448+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
449+ {
450+ mblength = 1;
451+ state = state_bak;
452+ }
453+ mblength = (mblength < 1) ? 1 : mblength;
454+
455+ if (mblength == tablen && !memcmp (sep, tab, mblength))
456+ break;
457+ else
458+ {
459+ sep += mblength;
460+ continue;
461+ }
462+ }
463+
464+ if (sep == lim)
465+ break;
466+
467+ extract_field (line, ptr, sep - ptr);
468+ }
469+ }
470+ else
471+ {
472+ /* Skip leading blanks before the first field. */
473+ while(ptr < lim)
474+ {
475+ state_bak = state;
476+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
477+
478+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
479+ {
480+ mblength = 1;
481+ state = state_bak;
482+ break;
483+ }
484+ mblength = (mblength < 1) ? 1 : mblength;
485+
486+ if (!iswblank(wc))
487+ break;
488+ ptr += mblength;
489+ }
490+
491+ do
492+ {
493+ char *sep;
494+ state_bak = state;
495+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
496+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
497+ {
498+ mblength = 1;
499+ state = state_bak;
500+ break;
501+ }
502+ mblength = (mblength < 1) ? 1 : mblength;
503+
504+ sep = ptr + mblength;
505+ while (sep != lim)
506+ {
507+ state_bak = state;
508+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
509+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
510+ {
511+ mblength = 1;
512+ state = state_bak;
513+ break;
514+ }
515+ mblength = (mblength < 1) ? 1 : mblength;
516+
517+ if (iswblank (wc))
518+ break;
519+
520+ sep += mblength;
521+ }
522+
523+ extract_field (line, ptr, sep - ptr);
524+ if (sep == lim)
525+ return;
526+
527+ state_bak = state;
528+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
529+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
530+ {
531+ mblength = 1;
532+ state = state_bak;
533+ break;
534+ }
535+ mblength = (mblength < 1) ? 1 : mblength;
536+
537+ ptr = sep + mblength;
538+ while (ptr != lim)
539+ {
540+ state_bak = state;
541+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
542+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
543+ {
544+ mblength = 1;
545+ state = state_bak;
546+ break;
547+ }
548+ mblength = (mblength < 1) ? 1 : mblength;
549+
550+ if (!iswblank (wc))
551+ break;
552+
553+ ptr += mblength;
554+ }
555+ }
556+ while (ptr != lim);
557+ }
558+
559+ extract_field (line, ptr, lim - ptr);
560+}
561+#endif
562+
563 /* Read a line from FP into LINE and split it into fields.
564 Return true if successful. */
565
566@@ -249,6 +410,11 @@
567 line->nfields_allocated = 0;
568 line->nfields = 0;
569 line->fields = NULL;
570+#if HAVE_MBRTOWC
571+ if (MB_CUR_MAX > 1)
572+ xfields_multibyte (line);
573+ else
574+#endif
575 xfields (line);
576 return true;
577 }
578@@ -303,56 +469,114 @@
579 keycmp (struct line const *line1, struct line const *line2)
580 {
581 /* Start of field to compare in each file. */
582- char *beg1;
583- char *beg2;
584-
585- size_t len1;
586- size_t len2; /* Length of fields to compare. */
587+ char *beg[2];
588+ char *copy[2];
589+ size_t len[2]; /* Length of fields to compare. */
590 int diff;
591+ int i, j;
592
593 if (join_field_1 < line1->nfields)
594 {
595- beg1 = line1->fields[join_field_1].beg;
596- len1 = line1->fields[join_field_1].len;
597+ beg[0] = line1->fields[join_field_1].beg;
598+ len[0] = line1->fields[join_field_1].len;
599 }
600 else
601 {
602- beg1 = NULL;
603- len1 = 0;
604+ beg[0] = NULL;
605+ len[0] = 0;
606 }
607
608 if (join_field_2 < line2->nfields)
609 {
610- beg2 = line2->fields[join_field_2].beg;
611- len2 = line2->fields[join_field_2].len;
612+ beg[1] = line2->fields[join_field_2].beg;
613+ len[1] = line2->fields[join_field_2].len;
614 }
615 else
616 {
617- beg2 = NULL;
618- len2 = 0;
619+ beg[1] = NULL;
620+ len[1] = 0;
621 }
622
623- if (len1 == 0)
624- return len2 == 0 ? 0 : -1;
625- if (len2 == 0)
626+ if (len[0] == 0)
627+ return len[1] == 0 ? 0 : -1;
628+ if (len[1] == 0)
629 return 1;
630
631 if (ignore_case)
632 {
633- /* FIXME: ignore_case does not work with NLS (in particular,
634- with multibyte chars). */
635- diff = memcasecmp (beg1, beg2, MIN (len1, len2));
636+#ifdef HAVE_MBRTOWC
637+ if (MB_CUR_MAX > 1)
638+ {
639+ size_t mblength;
640+ wchar_t wc, uwc;
641+ mbstate_t state, state_bak;
642+
643+ memset (&state, '\0', sizeof (mbstate_t));
644+
645+ for (i = 0; i < 2; i++)
646+ {
647+ copy[i] = alloca (len[i] + 1);
648+
649+ for (j = 0; j < MIN (len[0], len[1]);)
650+ {
651+ state_bak = state;
652+ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
653+
654+ switch (mblength)
655+ {
656+ case (size_t) -1:
657+ case (size_t) -2:
658+ state = state_bak;
659+ /* Fall through */
660+ case 0:
661+ mblength = 1;
662+ break;
663+
664+ default:
665+ uwc = towupper (wc);
666+
667+ if (uwc != wc)
668+ {
669+ mbstate_t state_wc;
670+
671+ memset (&state_wc, '\0', sizeof (mbstate_t));
672+ wcrtomb (copy[i] + j, uwc, &state_wc);
673+ }
674+ else
675+ memcpy (copy[i] + j, beg[i] + j, mblength);
676+ }
677+ j += mblength;
678+ }
679+ copy[i][j] = '\0';
680+ }
681+ }
682+ else
683+#endif
684+ {
685+ for (i = 0; i < 2; i++)
686+ {
687+ copy[i] = alloca (len[i] + 1);
688+
689+ for (j = 0; j < MIN (len[0], len[1]); j++)
690+ copy[i][j] = toupper (beg[i][j]);
691+
692+ copy[i][j] = '\0';
693+ }
694+ }
695 }
696 else
697 {
698- if (hard_LC_COLLATE)
699- return xmemcoll (beg1, len1, beg2, len2);
700- diff = memcmp (beg1, beg2, MIN (len1, len2));
701+ copy[0] = (unsigned char *) beg[0];
702+ copy[1] = (unsigned char *) beg[1];
703 }
704
705+ if (hard_LC_COLLATE)
706+ return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
707+ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
708+
709 if (diff)
710 return diff;
711- return len1 < len2 ? -1 : len1 != len2;
712+ return len[0] - len[1];
713 }
714
715 /* Print field N of LINE if it exists and is nonempty, otherwise
716@@ -377,11 +601,18 @@
717
718 /* Print the join of LINE1 and LINE2. */
719
720+#define PUT_TAB_CHAR \
721+ do \
722+ { \
723+ (tab != NULL) ? \
724+ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
725+ } \
726+ while (0)
727+
728 static void
729 prjoin (struct line const *line1, struct line const *line2)
730 {
731 const struct outlist *outlist;
732- char output_separator = tab < 0 ? ' ' : tab;
733
734 outlist = outlist_head.next;
735 if (outlist)
736@@ -397,12 +628,12 @@
737 if (o->file == 0)
738 {
739 if (line1 == &uni_blank)
740- {
741+ {
742 line = line2;
743 field = join_field_2;
744 }
745 else
746- {
747+ {
748 line = line1;
749 field = join_field_1;
750 }
751@@ -416,7 +647,7 @@
752 o = o->next;
753 if (o == NULL)
754 break;
755- putchar (output_separator);
756+ PUT_TAB_CHAR;
757 }
758 putchar ('\n');
759 }
760@@ -434,23 +665,23 @@
761 prfield (join_field_1, line1);
762 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
763 {
764- putchar (output_separator);
765+ PUT_TAB_CHAR;
766 prfield (i, line1);
767 }
768 for (i = join_field_1 + 1; i < line1->nfields; ++i)
769 {
770- putchar (output_separator);
771+ PUT_TAB_CHAR;
772 prfield (i, line1);
773 }
774
775 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
776 {
777- putchar (output_separator);
778+ PUT_TAB_CHAR;
779 prfield (i, line2);
780 }
781 for (i = join_field_2 + 1; i < line2->nfields; ++i)
782 {
783- putchar (output_separator);
784+ PUT_TAB_CHAR;
785 prfield (i, line2);
786 }
787 putchar ('\n');
788@@ -859,20 +1090,41 @@
789
790 case 't':
791 {
792- unsigned char newtab = optarg[0];
793- if (! newtab)
794+ char *newtab;
795+ size_t newtablen;
796+ if (! optarg[0])
797 error (EXIT_FAILURE, 0, _("empty tab"));
798- if (optarg[1])
799+ newtab = xstrdup (optarg);
800+#if HAVE_MBRTOWC
801+ if (MB_CUR_MAX > 1)
802+ {
803+ mbstate_t state;
804+
805+ memset (&state, 0, sizeof (mbstate_t));
806+ newtablen = mbrtowc (NULL, newtab,
807+ strnlen (newtab, MB_LEN_MAX),
808+ &state);
809+ if (newtablen == (size_t) 0
810+ || newtablen == (size_t) -1
811+ || newtablen == (size_t) -2)
812+ newtablen = 1;
813+ }
814+ else
815+#endif
816+ newtablen = 1;
817+
818+ if (newtablen == 1 && newtab[1])
819+ {
820+ if (STREQ (newtab, "\\0"))
821+ newtab[0] = '\0';
822+ }
823+ if (tab != NULL && strcmp (tab, newtab))
824 {
825- if (STREQ (optarg, "\\0"))
826- newtab = '\0';
827- else
828- error (EXIT_FAILURE, 0, _("multi-character tab %s"),
829- quote (optarg));
830+ free (newtab);
831+ error (EXIT_FAILURE, 0, _("incompatible tabs"));
832 }
833- if (0 <= tab && tab != newtab)
834- error (EXIT_FAILURE, 0, _("incompatible tabs"));
835 tab = newtab;
836+ tablen = newtablen;
837 }
838 break;
839
840--- coreutils-6.8+/src/uniq.c.i18n 2007-01-14 15:41:28.000000000 +0000
841+++ coreutils-6.8+/src/uniq.c 2007-03-01 15:08:24.000000000 +0000
842@@ -23,6 +23,16 @@
843 #include <getopt.h>
844 #include <sys/types.h>
845
846+/* Get mbstate_t, mbrtowc(). */
847+#if HAVE_WCHAR_H
848+# include <wchar.h>
849+#endif
850+
851+/* Get isw* functions. */
852+#if HAVE_WCTYPE_H
853+# include <wctype.h>
854+#endif
855+
856 #include "system.h"
857 #include "argmatch.h"
858 #include "linebuffer.h"
859@@ -32,7 +42,19 @@
860 #include "quote.h"
861 #include "xmemcoll.h"
862 #include "xstrtol.h"
863-#include "memcasecmp.h"
864+#include "xmemcoll.h"
865+
866+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
867+ installation; work around this configuration error. */
868+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
869+# define MB_LEN_MAX 16
870+#endif
871+
872+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
873+#if HAVE_MBRTOWC && defined mbstate_t
874+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
875+#endif
876+
877
878 /* The official name of this program (e.g., no `g' prefix). */
879 #define PROGRAM_NAME "uniq"
880@@ -109,6 +131,10 @@
881 /* Select whether/how to delimit groups of duplicate lines. */
882 static enum delimit_method delimit_groups;
883
884+/* Function pointers. */
885+static char *
886+(*find_field) (struct linebuffer *line);
887+
888 static struct option const longopts[] =
889 {
890 {"count", no_argument, NULL, 'c'},
891@@ -198,7 +224,7 @@
892 return a pointer to the beginning of the line's field to be compared. */
893
894 static char *
895-find_field (const struct linebuffer *line)
896+find_field_uni (struct linebuffer *line)
897 {
898 size_t count;
899 char *lp = line->buffer;
900@@ -219,6 +245,83 @@
901 return lp + i;
902 }
903
904+#if HAVE_MBRTOWC
905+
906+# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
907+ do \
908+ { \
909+ mbstate_t state_bak; \
910+ \
911+ CONVFAIL = 0; \
912+ state_bak = *STATEP; \
913+ \
914+ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
915+ \
916+ switch (MBLENGTH) \
917+ { \
918+ case (size_t)-2: \
919+ case (size_t)-1: \
920+ *STATEP = state_bak; \
921+ CONVFAIL++; \
922+ /* Fall through */ \
923+ case 0: \
924+ MBLENGTH = 1; \
925+ } \
926+ } \
927+ while (0)
928+
929+static char *
930+find_field_multi (struct linebuffer *line)
931+{
932+ size_t count;
933+ char *lp = line->buffer;
934+ size_t size = line->length - 1;
935+ size_t pos;
936+ size_t mblength;
937+ wchar_t wc;
938+ mbstate_t *statep;
939+ int convfail;
940+
941+ pos = 0;
942+ statep = &(line->state);
943+
944+ /* skip fields. */
945+ for (count = 0; count < skip_fields && pos < size; count++)
946+ {
947+ while (pos < size)
948+ {
949+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
950+
951+ if (convfail || !iswblank (wc))
952+ {
953+ pos += mblength;
954+ break;
955+ }
956+ pos += mblength;
957+ }
958+
959+ while (pos < size)
960+ {
961+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
962+
963+ if (!convfail && iswblank (wc))
964+ break;
965+
966+ pos += mblength;
967+ }
968+ }
969+
970+ /* skip fields. */
971+ for (count = 0; count < skip_chars && pos < size; count++)
972+ {
973+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
974+ pos += mblength;
975+ }
976+
977+ return lp + pos;
978+}
979+#endif
980+
981 /* Return false if two strings OLD and NEW match, true if not.
982 OLD and NEW point not to the beginnings of the lines
983 but rather to the beginnings of the fields to compare.
984@@ -227,6 +330,8 @@
985 static bool
986 different (char *old, char *new, size_t oldlen, size_t newlen)
987 {
988+ char *copy_old, *copy_new;
989+
990 if (check_chars < oldlen)
991 oldlen = check_chars;
992 if (check_chars < newlen)
993@@ -234,14 +339,92 @@
994
995 if (ignore_case)
996 {
997- /* FIXME: This should invoke strcoll somehow. */
998- return oldlen != newlen || memcasecmp (old, new, oldlen);
999+ size_t i;
1000+
1001+ copy_old = alloca (oldlen + 1);
1002+ copy_new = alloca (oldlen + 1);
1003+
1004+ for (i = 0; i < oldlen; i++)
1005+ {
1006+ copy_old[i] = toupper (old[i]);
1007+ copy_new[i] = toupper (new[i]);
1008+ }
1009 }
1010- else if (hard_LC_COLLATE)
1011- return xmemcoll (old, oldlen, new, newlen) != 0;
1012 else
1013- return oldlen != newlen || memcmp (old, new, oldlen);
1014+ {
1015+ copy_old = (char *)old;
1016+ copy_new = (char *)new;
1017+ }
1018+
1019+ return xmemcoll (copy_old, oldlen, copy_new, newlen);
1020+}
1021+
1022+#if HAVE_MBRTOWC
1023+static int
1024+different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
1025+{
1026+ size_t i, j, chars;
1027+ const char *str[2];
1028+ char *copy[2];
1029+ size_t len[2];
1030+ mbstate_t state[2];
1031+ size_t mblength;
1032+ wchar_t wc, uwc;
1033+ mbstate_t state_bak;
1034+
1035+ str[0] = old;
1036+ str[1] = new;
1037+ len[0] = oldlen;
1038+ len[1] = newlen;
1039+ state[0] = oldstate;
1040+ state[1] = newstate;
1041+
1042+ for (i = 0; i < 2; i++)
1043+ {
1044+ copy[i] = alloca (len[i] + 1);
1045+
1046+ for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1047+ {
1048+ state_bak = state[i];
1049+ mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1050+
1051+ switch (mblength)
1052+ {
1053+ case (size_t)-1:
1054+ case (size_t)-2:
1055+ state[i] = state_bak;
1056+ /* Fall through */
1057+ case 0:
1058+ mblength = 1;
1059+ break;
1060+
1061+ default:
1062+ if (ignore_case)
1063+ {
1064+ uwc = towupper (wc);
1065+
1066+ if (uwc != wc)
1067+ {
1068+ mbstate_t state_wc;
1069+
1070+ memset (&state_wc, '\0', sizeof(mbstate_t));
1071+ wcrtomb (copy[i] + j, uwc, &state_wc);
1072+ }
1073+ else
1074+ memcpy (copy[i] + j, str[i] + j, mblength);
1075+ }
1076+ else
1077+ memcpy (copy[i] + j, str[i] + j, mblength);
1078+ }
1079+ j += mblength;
1080+ }
1081+ copy[i][j] = '\0';
1082+ len[i] = j;
1083+ }
1084+
1085+ return xmemcoll (copy[0], len[0], copy[1], len[1]);
1086 }
1087+#endif
1088
1089 /* Output the line in linebuffer LINE to standard output
1090 provided that the switches say it should be output.
1091@@ -295,15 +478,43 @@
1092 {
1093 char *prevfield IF_LINT (= NULL);
1094 size_t prevlen IF_LINT (= 0);
1095+#if HAVE_MBRTOWC
1096+ mbstate_t prevstate;
1097+
1098+ memset (&prevstate, '\0', sizeof (mbstate_t));
1099+#endif
1100
1101 while (!feof (stdin))
1102 {
1103 char *thisfield;
1104 size_t thislen;
1105+#if HAVE_MBRTOWC
1106+ mbstate_t thisstate;
1107+#endif
1108+
1109 if (readlinebuffer (thisline, stdin) == 0)
1110 break;
1111 thisfield = find_field (thisline);
1112 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1113+#if HAVE_MBRTOWC
1114+ if (MB_CUR_MAX > 1)
1115+ {
1116+ thisstate = thisline->state;
1117+
1118+ if (prevline->length == 0 || different_multi
1119+ (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1120+ {
1121+ fwrite (thisline->buffer, sizeof (char),
1122+ thisline->length, stdout);
1123+
1124+ SWAP_LINES (prevline, thisline);
1125+ prevfield = thisfield;
1126+ prevlen = thislen;
1127+ prevstate = thisstate;
1128+ }
1129+ }
1130+ else
1131+#endif
1132 if (prevline->length == 0
1133 || different (thisfield, prevfield, thislen, prevlen))
1134 {
1135@@ -322,17 +533,26 @@
1136 size_t prevlen;
1137 uintmax_t match_count = 0;
1138 bool first_delimiter = true;
1139+#if HAVE_MBRTOWC
1140+ mbstate_t prevstate;
1141+#endif
1142
1143 if (readlinebuffer (prevline, stdin) == 0)
1144 goto closefiles;
1145 prevfield = find_field (prevline);
1146 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1147+#if HAVE_MBRTOWC
1148+ prevstate = prevline->state;
1149+#endif
1150
1151 while (!feof (stdin))
1152 {
1153 bool match;
1154 char *thisfield;
1155 size_t thislen;
1156+#if HAVE_MBRTOWC
1157+ mbstate_t thisstate;
1158+#endif
1159 if (readlinebuffer (thisline, stdin) == 0)
1160 {
1161 if (ferror (stdin))
1162@@ -341,6 +561,15 @@
1163 }
1164 thisfield = find_field (thisline);
1165 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1166+#if HAVE_MBRTOWC
1167+ if (MB_CUR_MAX > 1)
1168+ {
1169+ thisstate = thisline->state;
1170+ match = !different_multi (thisfield, prevfield,
1171+ thislen, prevlen, thisstate, prevstate);
1172+ }
1173+ else
1174+#endif
1175 match = !different (thisfield, prevfield, thislen, prevlen);
1176 match_count += match;
1177
1178@@ -373,6 +602,9 @@
1179 SWAP_LINES (prevline, thisline);
1180 prevfield = thisfield;
1181 prevlen = thislen;
1182+#if HAVE_MBRTOWC
1183+ prevstate = thisstate;
1184+#endif
1185 if (!match)
1186 match_count = 0;
1187 }
1188@@ -417,6 +649,19 @@
1189
1190 atexit (close_stdout);
1191
1192+#if HAVE_MBRTOWC
1193+ if (MB_CUR_MAX > 1)
1194+ {
1195+ find_field = find_field_multi;
1196+ }
1197+ else
1198+#endif
1199+ {
1200+ find_field = find_field_uni;
1201+ }
1202+
1203+
1204+
1205 skip_chars = 0;
1206 skip_fields = 0;
1207 check_chars = SIZE_MAX;
1208--- coreutils-6.8+/src/fold.c.i18n 2007-02-23 12:01:47.000000000 +0000
1209+++ coreutils-6.8+/src/fold.c 2007-03-01 15:08:24.000000000 +0000
1210@@ -23,11 +23,33 @@
1211 #include <getopt.h>
1212 #include <sys/types.h>
1213
1214+/* Get mbstate_t, mbrtowc(), wcwidth(). */
1215+#if HAVE_WCHAR_H
1216+# include <wchar.h>
1217+#endif
1218+
1219+/* Get iswprint(), iswblank(), wcwidth(). */
1220+#if HAVE_WCTYPE_H
1221+# include <wctype.h>
1222+#endif
1223+
1224 #include "system.h"
1225 #include "error.h"
1226 #include "quote.h"
1227 #include "xstrtol.h"
1228
1229+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1230+ installation; work around this configuration error. */
1231+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1232+# undef MB_LEN_MAX
1233+# define MB_LEN_MAX 16
1234+#endif
1235+
1236+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1237+#if HAVE_MBRTOWC && defined mbstate_t
1238+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1239+#endif
1240+
1241 #define TAB_WIDTH 8
1242
1243 /* The official name of this program (e.g., no `g' prefix). */
1244@@ -35,23 +57,44 @@
1245
1246 #define AUTHORS "David MacKenzie"
1247
1248+#define FATAL_ERROR(Message) \
1249+ do \
1250+ { \
1251+ error (0, 0, (Message)); \
1252+ usage (2); \
1253+ } \
1254+ while (0)
1255+
1256+enum operating_mode
1257+{
1258+ /* Fold texts by columns that are at the given positions. */
1259+ column_mode,
1260+
1261+ /* Fold texts by bytes that are at the given positions. */
1262+ byte_mode,
1263+
1264+ /* Fold texts by characters that are at the given positions. */
1265+ character_mode,
1266+};
1267+
1268 /* The name this program was run with. */
1269 char *program_name;
1270
1271+/* The argument shows current mode. (Default: column_mode) */
1272+static enum operating_mode operating_mode;
1273+
1274 /* If nonzero, try to break on whitespace. */
1275 static bool break_spaces;
1276
1277-/* If nonzero, count bytes, not column positions. */
1278-static bool count_bytes;
1279-
1280 /* If nonzero, at least one of the files we read was standard input. */
1281 static bool have_read_stdin;
1282
1283-static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
1284+static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
1285
1286 static struct option const longopts[] =
1287 {
1288 {"bytes", no_argument, NULL, 'b'},
1289+ {"characters", no_argument, NULL, 'c'},
1290 {"spaces", no_argument, NULL, 's'},
1291 {"width", required_argument, NULL, 'w'},
1292 {GETOPT_HELP_OPTION_DECL},
1293@@ -81,6 +124,7 @@
1294 "), stdout);
1295 fputs (_("\
1296 -b, --bytes count bytes rather than columns\n\
1297+ -c, --characters count characters rather than columns\n\
1298 -s, --spaces break at spaces\n\
1299 -w, --width=WIDTH use WIDTH columns instead of 80\n\
1300 "), stdout);
1301@@ -98,7 +142,7 @@
1302 static size_t
1303 adjust_column (size_t column, char c)
1304 {
1305- if (!count_bytes)
1306+ if (operating_mode != byte_mode)
1307 {
1308 if (c == '\b')
1309 {
1310@@ -121,30 +165,14 @@
1311 to stdout, with maximum line length WIDTH.
1312 Return true if successful. */
1313
1314-static bool
1315-fold_file (char const *filename, size_t width)
1316+static void
1317+fold_text (FILE *istream, size_t width, int *saved_errno)
1318 {
1319- FILE *istream;
1320 int c;
1321 size_t column = 0; /* Screen column where next char will go. */
1322 size_t offset_out = 0; /* Index in `line_out' for next char. */
1323 static char *line_out = NULL;
1324 static size_t allocated_out = 0;
1325- int saved_errno;
1326-
1327- if (STREQ (filename, "-"))
1328- {
1329- istream = stdin;
1330- have_read_stdin = true;
1331- }
1332- else
1333- istream = fopen (filename, "r");
1334-
1335- if (istream == NULL)
1336- {
1337- error (0, errno, "%s", filename);
1338- return false;
1339- }
1340
1341 while ((c = getc (istream)) != EOF)
1342 {
1343@@ -172,6 +200,15 @@
1344 bool found_blank = false;
1345 size_t logical_end = offset_out;
1346
1347+ /* If LINE_OUT has no wide character,
1348+ put a new wide character in LINE_OUT
1349+ if column is bigger than width. */
1350+ if (offset_out == 0)
1351+ {
1352+ line_out[offset_out++] = c;
1353+ continue;
1354+ }
1355+
1356 /* Look for the last blank. */
1357 while (logical_end)
1358 {
1359@@ -218,11 +255,225 @@
1360 line_out[offset_out++] = c;
1361 }
1362
1363- saved_errno = errno;
1364+ *saved_errno = errno;
1365+
1366+ if (offset_out)
1367+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1368+
1369+ free(line_out);
1370+}
1371+
1372+#if HAVE_MBRTOWC
1373+static void
1374+fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
1375+{
1376+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
1377+ size_t buflen = 0; /* The length of the byte sequence in buf. */
1378+ char *bufpos; /* Next read position of BUF. */
1379+ wint_t wc; /* A gotten wide character. */
1380+ size_t mblength; /* The byte size of a multibyte character which shows
1381+ as same character as WC. */
1382+ mbstate_t state, state_bak; /* State of the stream. */
1383+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
1384+
1385+ char *line_out = NULL;
1386+ size_t offset_out = 0; /* Index in `line_out' for next char. */
1387+ size_t allocated_out = 0;
1388+
1389+ int increment;
1390+ size_t column = 0;
1391+
1392+ size_t last_blank_pos;
1393+ size_t last_blank_column;
1394+ int is_blank_seen;
1395+ int last_blank_increment;
1396+ int is_bs_following_last_blank;
1397+ size_t bs_following_last_blank_num;
1398+ int is_cr_after_last_blank;
1399+
1400+#define CLEAR_FLAGS \
1401+ do \
1402+ { \
1403+ last_blank_pos = 0; \
1404+ last_blank_column = 0; \
1405+ is_blank_seen = 0; \
1406+ is_bs_following_last_blank = 0; \
1407+ bs_following_last_blank_num = 0; \
1408+ is_cr_after_last_blank = 0; \
1409+ } \
1410+ while (0)
1411+
1412+#define START_NEW_LINE \
1413+ do \
1414+ { \
1415+ putchar ('\n'); \
1416+ column = 0; \
1417+ offset_out = 0; \
1418+ CLEAR_FLAGS; \
1419+ } \
1420+ while (0)
1421+
1422+ CLEAR_FLAGS;
1423+ memset (&state, '\0', sizeof(mbstate_t));
1424+
1425+ for (;; bufpos += mblength, buflen -= mblength)
1426+ {
1427+ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1428+ {
1429+ memmove (buf, bufpos, buflen);
1430+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1431+ bufpos = buf;
1432+ }
1433+
1434+ if (buflen < 1)
1435+ break;
1436+
1437+ /* Get a wide character. */
1438+ convfail = 0;
1439+ state_bak = state;
1440+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1441+
1442+ switch (mblength)
1443+ {
1444+ case (size_t)-1:
1445+ case (size_t)-2:
1446+ convfail++;
1447+ state = state_bak;
1448+ /* Fall through. */
1449+
1450+ case 0:
1451+ mblength = 1;
1452+ break;
1453+ }
1454+
1455+rescan:
1456+ if (operating_mode == byte_mode) /* byte mode */
1457+ increment = mblength;
1458+ else if (operating_mode == character_mode) /* character mode */
1459+ increment = 1;
1460+ else /* column mode */
1461+ {
1462+ if (convfail)
1463+ increment = 1;
1464+ else
1465+ {
1466+ switch (wc)
1467+ {
1468+ case L'\n':
1469+ fwrite (line_out, sizeof(char), offset_out, stdout);
1470+ START_NEW_LINE;
1471+ continue;
1472+
1473+ case L'\b':
1474+ increment = (column > 0) ? -1 : 0;
1475+ break;
1476+
1477+ case L'\r':
1478+ increment = -1 * column;
1479+ break;
1480+
1481+ case L'\t':
1482+ increment = 8 - column % 8;
1483+ break;
1484+
1485+ default:
1486+ increment = wcwidth (wc);
1487+ increment = (increment < 0) ? 0 : increment;
1488+ }
1489+ }
1490+ }
1491+
1492+ if (column + increment > width && break_spaces && last_blank_pos)
1493+ {
1494+ fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1495+ putchar ('\n');
1496+
1497+ offset_out = offset_out - last_blank_pos;
1498+ column = column - last_blank_column + ((is_cr_after_last_blank)
1499+ ? last_blank_increment : bs_following_last_blank_num);
1500+ memmove (line_out, line_out + last_blank_pos, offset_out);
1501+ CLEAR_FLAGS;
1502+ goto rescan;
1503+ }
1504+
1505+ if (column + increment > width && column != 0)
1506+ {
1507+ fwrite (line_out, sizeof(char), offset_out, stdout);
1508+ START_NEW_LINE;
1509+ goto rescan;
1510+ }
1511+
1512+ if (allocated_out < offset_out + mblength)
1513+ {
1514+ allocated_out += 1024;
1515+ line_out = xrealloc (line_out, allocated_out);
1516+ }
1517+
1518+ memcpy (line_out + offset_out, bufpos, mblength);
1519+ offset_out += mblength;
1520+ column += increment;
1521+
1522+ if (is_blank_seen && !convfail && wc == L'\r')
1523+ is_cr_after_last_blank = 1;
1524+
1525+ if (is_bs_following_last_blank && !convfail && wc == L'\b')
1526+ ++bs_following_last_blank_num;
1527+ else
1528+ is_bs_following_last_blank = 0;
1529+
1530+ if (break_spaces && !convfail && iswblank (wc))
1531+ {
1532+ last_blank_pos = offset_out;
1533+ last_blank_column = column;
1534+ is_blank_seen = 1;
1535+ last_blank_increment = increment;
1536+ is_bs_following_last_blank = 1;
1537+ bs_following_last_blank_num = 0;
1538+ is_cr_after_last_blank = 0;
1539+ }
1540+ }
1541+
1542+ *saved_errno = errno;
1543
1544 if (offset_out)
1545 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1546
1547+ free(line_out);
1548+}
1549+#endif
1550+
1551+/* Fold file FILENAME, or standard input if FILENAME is "-",
1552+ to stdout, with maximum line length WIDTH.
1553+ Return 0 if successful, 1 if an error occurs. */
1554+
1555+static bool
1556+fold_file (char *filename, size_t width)
1557+{
1558+ FILE *istream;
1559+ int saved_errno;
1560+
1561+ if (STREQ (filename, "-"))
1562+ {
1563+ istream = stdin;
1564+ have_read_stdin = 1;
1565+ }
1566+ else
1567+ istream = fopen (filename, "r");
1568+
1569+ if (istream == NULL)
1570+ {
1571+ error (0, errno, "%s", filename);
1572+ return 1;
1573+ }
1574+
1575+ /* Define how ISTREAM is being folded. */
1576+#if HAVE_MBRTOWC
1577+ if (MB_CUR_MAX > 1)
1578+ fold_multibyte_text (istream, width, &saved_errno);
1579+ else
1580+#endif
1581+ fold_text (istream, width, &saved_errno);
1582+
1583 if (ferror (istream))
1584 {
1585 error (0, saved_errno, "%s", filename);
1586@@ -255,7 +506,8 @@
1587
1588 atexit (close_stdout);
1589
1590- break_spaces = count_bytes = have_read_stdin = false;
1591+ operating_mode = column_mode;
1592+ break_spaces = have_read_stdin = false;
1593
1594 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1595 {
1596@@ -264,7 +516,15 @@
1597 switch (optc)
1598 {
1599 case 'b': /* Count bytes rather than columns. */
1600- count_bytes = true;
1601+ if (operating_mode != column_mode)
1602+ FATAL_ERROR (_("only one way of folding may be specified"));
1603+ operating_mode = byte_mode;
1604+ break;
1605+
1606+ case 'c':
1607+ if (operating_mode != column_mode)
1608+ FATAL_ERROR (_("only one way of folding may be specified"));
1609+ operating_mode = character_mode;
1610 break;
1611
1612 case 's': /* Break at word boundaries. */
1613--- coreutils-6.8+/src/sort.c.i18n 2007-02-24 11:23:23.000000000 +0000
1614+++ coreutils-6.8+/src/sort.c 2007-03-01 15:10:57.000000000 +0000
1615@@ -23,10 +23,19 @@
1616
1617 #include <config.h>
1618
1619+#include <assert.h>
1620 #include <getopt.h>
1621 #include <sys/types.h>
1622 #include <sys/wait.h>
1623 #include <signal.h>
1624+#if HAVE_WCHAR_H
1625+# include <wchar.h>
1626+#endif
1627+/* Get isw* functions. */
1628+#if HAVE_WCTYPE_H
1629+# include <wctype.h>
1630+#endif
1631+
1632 #include "system.h"
1633 #include "argmatch.h"
1634 #include "error.h"
1635@@ -116,14 +125,38 @@
1636 /* Thousands separator; if -1, then there isn't one. */
1637 static int thousands_sep;
1638
1639+static int force_general_numcompare = 0;
1640+
1641 /* Nonzero if the corresponding locales are hard. */
1642 static bool hard_LC_COLLATE;
1643-#if HAVE_NL_LANGINFO
1644+#if HAVE_LANGINFO_CODESET
1645 static bool hard_LC_TIME;
1646 #endif
1647
1648 #define NONZERO(x) ((x) != 0)
1649
1650+/* get a multibyte character's byte length. */
1651+#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
1652+ do \
1653+ { \
1654+ wchar_t wc; \
1655+ mbstate_t state_bak; \
1656+ \
1657+ state_bak = STATE; \
1658+ mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
1659+ \
1660+ switch (MBLENGTH) \
1661+ { \
1662+ case (size_t)-1: \
1663+ case (size_t)-2: \
1664+ STATE = state_bak; \
1665+ /* Fall through. */ \
1666+ case 0: \
1667+ MBLENGTH = 1; \
1668+ } \
1669+ } \
1670+ while (0)
1671+
1672 /* The kind of blanks for '-b' to skip in various options. */
1673 enum blanktype { bl_start, bl_end, bl_both };
1674
1675@@ -261,13 +294,11 @@
1676 they were read if all keys compare equal. */
1677 static bool stable;
1678
1679-/* If TAB has this value, blanks separate fields. */
1680-enum { TAB_DEFAULT = CHAR_MAX + 1 };
1681-
1682-/* Tab character separating fields. If TAB_DEFAULT, then fields are
1683+/* Tab character separating fields. If tab_length is 0, then fields are
1684 separated by the empty string between a non-blank character and a blank
1685 character. */
1686-static int tab = TAB_DEFAULT;
1687+static char tab[MB_LEN_MAX + 1];
1688+static size_t tab_length = 0;
1689
1690 /* Flag to remove consecutive duplicate lines from the output.
1691 Only the last of a sequence of equal lines will be output. */
1692@@ -639,6 +670,44 @@
1693 update_proc (pid);
1694 }
1695
1696+/* Function pointers. */
1697+static void
1698+(*inittables) (void);
1699+static char *
1700+(*begfield) (const struct line*, const struct keyfield *);
1701+static char *
1702+(*limfield) (const struct line*, const struct keyfield *);
1703+static int
1704+(*getmonth) (char const *, size_t);
1705+static int
1706+(*keycompare) (const struct line *, const struct line *);
1707+static int
1708+(*numcompare) (const char *, const char *);
1709+
1710+/* Test for white space multibyte character.
1711+ Set LENGTH the byte length of investigated multibyte character. */
1712+#if HAVE_MBRTOWC
1713+static int
1714+ismbblank (const char *str, size_t len, size_t *length)
1715+{
1716+ size_t mblength;
1717+ wchar_t wc;
1718+ mbstate_t state;
1719+
1720+ memset (&state, '\0', sizeof(mbstate_t));
1721+ mblength = mbrtowc (&wc, str, len, &state);
1722+
1723+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1724+ {
1725+ *length = 1;
1726+ return 0;
1727+ }
1728+
1729+ *length = (mblength < 1) ? 1 : mblength;
1730+ return iswblank (wc);
1731+}
1732+#endif
1733+
1734 /* Clean up any remaining temporary files. */
1735
1736 static void
1737@@ -978,7 +1047,7 @@
1738 free (node);
1739 }
1740
1741-#if HAVE_NL_LANGINFO
1742+#if HAVE_LANGINFO_CODESET
1743
1744 static int
1745 struct_month_cmp (const void *m1, const void *m2)
1746@@ -993,7 +1062,7 @@
1747 /* Initialize the character class tables. */
1748
1749 static void
1750-inittables (void)
1751+inittables_uni (void)
1752 {
1753 size_t i;
1754
1755@@ -1005,7 +1074,7 @@
1756 fold_toupper[i] = toupper (i);
1757 }
1758
1759-#if HAVE_NL_LANGINFO
1760+#if HAVE_LANGINFO_CODESET
1761 /* If we're not in the "C" locale, read different names for months. */
1762 if (hard_LC_TIME)
1763 {
1764@@ -1031,6 +1100,64 @@
1765 #endif
1766 }
1767
1768+#if HAVE_MBRTOWC
1769+static void
1770+inittables_mb (void)
1771+{
1772+ int i, j, k, l;
1773+ char *name, *s;
1774+ size_t s_len, mblength;
1775+ char mbc[MB_LEN_MAX];
1776+ wchar_t wc, pwc;
1777+ mbstate_t state_mb, state_wc;
1778+
1779+ for (i = 0; i < MONTHS_PER_YEAR; i++)
1780+ {
1781+ s = (char *) nl_langinfo (ABMON_1 + i);
1782+ s_len = strlen (s);
1783+ monthtab[i].name = name = (char *) xmalloc (s_len + 1);
1784+ monthtab[i].val = i + 1;
1785+
1786+ memset (&state_mb, '\0', sizeof (mbstate_t));
1787+ memset (&state_wc, '\0', sizeof (mbstate_t));
1788+
1789+ for (j = 0; j < s_len;)
1790+ {
1791+ if (!ismbblank (s + j, s_len - j, &mblength))
1792+ break;
1793+ j += mblength;
1794+ }
1795+
1796+ for (k = 0; j < s_len;)
1797+ {
1798+ mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
1799+ assert (mblength != (size_t)-1 && mblength != (size_t)-2);
1800+ if (mblength == 0)
1801+ break;
1802+
1803+ pwc = towupper (wc);
1804+ if (pwc == wc)
1805+ {
1806+ memcpy (mbc, s + j, mblength);
1807+ j += mblength;
1808+ }
1809+ else
1810+ {
1811+ j += mblength;
1812+ mblength = wcrtomb (mbc, pwc, &state_wc);
1813+ assert (mblength != (size_t)0 && mblength != (size_t)-1);
1814+ }
1815+
1816+ for (l = 0; l < mblength; l++)
1817+ name[k++] = mbc[l];
1818+ }
1819+ name[k] = '\0';
1820+ }
1821+ qsort ((void *) monthtab, MONTHS_PER_YEAR,
1822+ sizeof (struct month), struct_month_cmp);
1823+}
1824+#endif
1825+
1826 /* Specify the amount of main memory to use when sorting. */
1827 static void
1828 specify_sort_size (char const *s)
1829@@ -1241,7 +1368,7 @@
1830 by KEY in LINE. */
1831
1832 static char *
1833-begfield (const struct line *line, const struct keyfield *key)
1834+begfield_uni (const struct line *line, const struct keyfield *key)
1835 {
1836 char *ptr = line->text, *lim = ptr + line->length - 1;
1837 size_t sword = key->sword;
1838@@ -1251,10 +1378,10 @@
1839 /* The leading field separator itself is included in a field when -t
1840 is absent. */
1841
1842- if (tab != TAB_DEFAULT)
1843+ if (tab_length)
1844 while (ptr < lim && sword--)
1845 {
1846- while (ptr < lim && *ptr != tab)
1847+ while (ptr < lim && *ptr != tab[0])
1848 ++ptr;
1849 if (ptr < lim)
1850 ++ptr;
1851@@ -1282,11 +1409,70 @@
1852 return ptr;
1853 }
1854
1855+#if HAVE_MBRTOWC
1856+static char *
1857+begfield_mb (const struct line *line, const struct keyfield *key)
1858+{
1859+ int i;
1860+ char *ptr = line->text, *lim = ptr + line->length - 1;
1861+ size_t sword = key->sword;
1862+ size_t schar = key->schar;
1863+ size_t mblength;
1864+ mbstate_t state;
1865+
1866+ memset (&state, '\0', sizeof(mbstate_t));
1867+
1868+ if (tab_length)
1869+ while (ptr < lim && sword--)
1870+ {
1871+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1872+ {
1873+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1874+ ptr += mblength;
1875+ }
1876+ if (ptr < lim)
1877+ {
1878+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1879+ ptr += mblength;
1880+ }
1881+ }
1882+ else
1883+ while (ptr < lim && sword--)
1884+ {
1885+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1886+ ptr += mblength;
1887+ if (ptr < lim)
1888+ {
1889+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1890+ ptr += mblength;
1891+ }
1892+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1893+ ptr += mblength;
1894+ }
1895+
1896+ if (key->skipsblanks)
1897+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1898+ ptr += mblength;
1899+
1900+ for (i = 0; i < schar; i++)
1901+ {
1902+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1903+
1904+ if (ptr + mblength > lim)
1905+ break;
1906+ else
1907+ ptr += mblength;
1908+ }
1909+
1910+ return ptr;
1911+}
1912+#endif
1913+
1914 /* Return the limit of (a pointer to the first character after) the field
1915 in LINE specified by KEY. */
1916
1917 static char *
1918-limfield (const struct line *line, const struct keyfield *key)
1919+limfield_uni (const struct line *line, const struct keyfield *key)
1920 {
1921 char *ptr = line->text, *lim = ptr + line->length - 1;
1922 size_t eword = key->eword, echar = key->echar;
1923@@ -1299,10 +1485,10 @@
1924 `beginning' is the first character following the delimiting TAB.
1925 Otherwise, leave PTR pointing at the first `blank' character after
1926 the preceding field. */
1927- if (tab != TAB_DEFAULT)
1928+ if (tab_length)
1929 while (ptr < lim && eword--)
1930 {
1931- while (ptr < lim && *ptr != tab)
1932+ while (ptr < lim && *ptr != tab[0])
1933 ++ptr;
1934 if (ptr < lim && (eword | echar))
1935 ++ptr;
1936@@ -1348,10 +1534,10 @@
1937 */
1938
1939 /* Make LIM point to the end of (one byte past) the current field. */
1940- if (tab != TAB_DEFAULT)
1941+ if (tab_length)
1942 {
1943 char *newlim;
1944- newlim = memchr (ptr, tab, lim - ptr);
1945+ newlim = memchr (ptr, tab[0], lim - ptr);
1946 if (newlim)
1947 lim = newlim;
1948 }
1949@@ -1384,6 +1570,107 @@
1950 return ptr;
1951 }
1952
1953+#if HAVE_MBRTOWC
1954+static char *
1955+limfield_mb (const struct line *line, const struct keyfield *key)
1956+{
1957+ char *ptr = line->text, *lim = ptr + line->length - 1;
1958+ size_t eword = key->eword, echar = key->echar;
1959+ int i;
1960+ size_t mblength;
1961+ mbstate_t state;
1962+
1963+ memset (&state, '\0', sizeof(mbstate_t));
1964+
1965+ if (tab_length)
1966+ while (ptr < lim && eword--)
1967+ {
1968+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1969+ {
1970+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1971+ ptr += mblength;
1972+ }
1973+ if (ptr < lim && (eword | echar))
1974+ {
1975+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1976+ ptr += mblength;
1977+ }
1978+ }
1979+ else
1980+ while (ptr < lim && eword--)
1981+ {
1982+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1983+ ptr += mblength;
1984+ if (ptr < lim)
1985+ {
1986+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1987+ ptr += mblength;
1988+ }
1989+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1990+ ptr += mblength;
1991+ }
1992+
1993+
1994+# ifdef POSIX_UNSPECIFIED
1995+ /* Make LIM point to the end of (one byte past) the current field. */
1996+ if (tab_length)
1997+ {
1998+ char *newlim, *p;
1999+
2000+ newlim = NULL;
2001+ for (p = ptr; p < lim;)
2002+ {
2003+ if (memcmp (p, tab, tab_length) == 0)
2004+ {
2005+ newlim = p;
2006+ break;
2007+ }
2008+
2009+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2010+ p += mblength;
2011+ }
2012+ }
2013+ else
2014+ {
2015+ char *newlim;
2016+ newlim = ptr;
2017+
2018+ while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
2019+ newlim += mblength;
2020+ if (ptr < lim)
2021+ {
2022+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2023+ ptr += mblength;
2024+ }
2025+ while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
2026+ newlim += mblength;
2027+ lim = newlim;
2028+ }
2029+# endif
2030+
2031+ /* If we're skipping leading blanks, don't start counting characters
2032+ * until after skipping past any leading blanks. */
2033+ if (key->skipsblanks)
2034+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2035+ ptr += mblength;
2036+
2037+ memset (&state, '\0', sizeof(mbstate_t));
2038+
2039+ /* Advance PTR by ECHAR (if possible), but no further than LIM. */
2040+ for (i = 0; i < echar; i++)
2041+ {
2042+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2043+
2044+ if (ptr + mblength > lim)
2045+ break;
2046+ else
2047+ ptr += mblength;
2048+ }
2049+
2050+ return ptr;
2051+}
2052+#endif
2053+
2054 /* Fill BUF reading from FP, moving buf->left bytes from the end
2055 of buf->buf to the beginning first. If EOF is reached and the
2056 file wasn't terminated by a newline, supply one. Set up BUF's line
2057@@ -1466,8 +1753,24 @@
2058 else
2059 {
2060 if (key->skipsblanks)
2061- while (blanks[to_uchar (*line_start)])
2062- line_start++;
2063+ {
2064+#if HAVE_MBRTOWC
2065+ if (MB_CUR_MAX > 1)
2066+ {
2067+ size_t mblength;
2068+ mbstate_t state;
2069+ memset (&state, '\0', sizeof(mbstate_t));
2070+ while (line_start < line->keylim &&
2071+ ismbblank (line_start,
2072+ line->keylim - line_start,
2073+ &mblength))
2074+ line_start += mblength;
2075+ }
2076+ else
2077+#endif
2078+ while (blanks[to_uchar (*line_start)])
2079+ line_start++;
2080+ }
2081 line->keybeg = line_start;
2082 }
2083 }
2084@@ -1500,7 +1803,7 @@
2085 hideously fast. */
2086
2087 static int
2088-numcompare (const char *a, const char *b)
2089+numcompare_uni (const char *a, const char *b)
2090 {
2091 while (blanks[to_uchar (*a)])
2092 a++;
2093@@ -1510,6 +1813,25 @@
2094 return strnumcmp (a, b, decimal_point, thousands_sep);
2095 }
2096
2097+#if HAVE_MBRTOWC
2098+static int
2099+numcompare_mb (const char *a, const char *b)
2100+{
2101+ size_t mblength, len;
2102+ len = strlen (a); /* okay for UTF-8 */
2103+ while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2104+ {
2105+ a += mblength;
2106+ len -= mblength;
2107+ }
2108+ len = strlen (b); /* okay for UTF-8 */
2109+ while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2110+ b += mblength;
2111+
2112+ return strnumcmp (a, b, decimal_point, thousands_sep);
2113+}
2114+#endif /* HAV_EMBRTOWC */
2115+
2116 static int
2117 general_numcompare (const char *sa, const char *sb)
2118 {
2119@@ -1543,7 +1865,7 @@
2120 Return 0 if the name in S is not recognized. */
2121
2122 static int
2123-getmonth (char const *month, size_t len)
2124+getmonth_uni (char const *month, size_t len)
2125 {
2126 size_t lo = 0;
2127 size_t hi = MONTHS_PER_YEAR;
2128@@ -1698,11 +2020,79 @@
2129 return diff;
2130 }
2131
2132+#if HAVE_MBRTOWC
2133+static int
2134+getmonth_mb (const char *s, size_t len)
2135+{
2136+ char *month;
2137+ register size_t i;
2138+ register int lo = 0, hi = MONTHS_PER_YEAR, result;
2139+ char *tmp;
2140+ size_t wclength, mblength;
2141+ const char **pp;
2142+ const wchar_t **wpp;
2143+ wchar_t *month_wcs;
2144+ mbstate_t state;
2145+
2146+ while (len > 0 && ismbblank (s, len, &mblength))
2147+ {
2148+ s += mblength;
2149+ len -= mblength;
2150+ }
2151+
2152+ if (len == 0)
2153+ return 0;
2154+
2155+ month = (char *) alloca (len + 1);
2156+
2157+ tmp = (char *) alloca (len + 1);
2158+ memcpy (tmp, s, len);
2159+ tmp[len] = '\0';
2160+ pp = (const char **)&tmp;
2161+ month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
2162+ memset (&state, '\0', sizeof(mbstate_t));
2163+
2164+ wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
2165+ assert (wclength != (size_t)-1 && *pp == NULL);
2166+
2167+ for (i = 0; i < wclength; i++)
2168+ {
2169+ month_wcs[i] = towupper(month_wcs[i]);
2170+ if (iswblank (month_wcs[i]))
2171+ {
2172+ month_wcs[i] = L'\0';
2173+ break;
2174+ }
2175+ }
2176+
2177+ wpp = (const wchar_t **)&month_wcs;
2178+
2179+ mblength = wcsrtombs (month, wpp, len + 1, &state);
2180+ assert (mblength != (-1) && *wpp == NULL);
2181+
2182+ do
2183+ {
2184+ int ix = (lo + hi) / 2;
2185+
2186+ if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
2187+ hi = ix;
2188+ else
2189+ lo = ix;
2190+ }
2191+ while (hi - lo > 1);
2192+
2193+ result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
2194+ ? monthtab[lo].val : 0);
2195+
2196+ return result;
2197+}
2198+#endif
2199+
2200 /* Compare two lines A and B trying every key in sequence until there
2201 are no more keys or a difference is found. */
2202
2203 static int
2204-keycompare (const struct line *a, const struct line *b)
2205+keycompare_uni (const struct line *a, const struct line *b)
2206 {
2207 struct keyfield const *key = keylist;
2208
2209@@ -1875,6 +2265,179 @@
2210 return key->reverse ? -diff : diff;
2211 }
2212
2213+#if HAVE_MBRTOWC
2214+static int
2215+keycompare_mb (const struct line *a, const struct line *b)
2216+{
2217+ struct keyfield *key = keylist;
2218+
2219+ /* For the first iteration only, the key positions have been
2220+ precomputed for us. */
2221+ char *texta = a->keybeg;
2222+ char *textb = b->keybeg;
2223+ char *lima = a->keylim;
2224+ char *limb = b->keylim;
2225+
2226+ size_t mblength_a, mblength_b;
2227+ wchar_t wc_a, wc_b;
2228+ mbstate_t state_a, state_b;
2229+
2230+ int diff;
2231+
2232+ memset (&state_a, '\0', sizeof(mbstate_t));
2233+ memset (&state_b, '\0', sizeof(mbstate_t));
2234+
2235+ for (;;)
2236+ {
2237+ unsigned char *translate = (unsigned char *) key->translate;
2238+ bool const *ignore = key->ignore;
2239+
2240+ /* Find the lengths. */
2241+ size_t lena = lima <= texta ? 0 : lima - texta;
2242+ size_t lenb = limb <= textb ? 0 : limb - textb;
2243+
2244+ /* Actually compare the fields. */
2245+ if (key->random)
2246+ diff = compare_random (texta, lena, textb, lenb);
2247+ else if (key->numeric | key->general_numeric)
2248+ {
2249+ char savea = *lima, saveb = *limb;
2250+
2251+ *lima = *limb = '\0';
2252+ if (force_general_numcompare)
2253+ diff = general_numcompare (texta, textb);
2254+ else
2255+ diff = ((key->numeric ? numcompare : general_numcompare)
2256+ (texta, textb));
2257+ *lima = savea, *limb = saveb;
2258+ }
2259+ else if (key->month)
2260+ diff = getmonth (texta, lena) - getmonth (textb, lenb);
2261+ else
2262+ {
2263+ if (ignore || translate)
2264+ {
2265+ char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
2266+ char *copy_b = copy_a + lena + 1;
2267+ size_t new_len_a, new_len_b;
2268+ size_t i, j;
2269+
2270+ /* Ignore and/or translate chars before comparing. */
2271+# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
2272+ do \
2273+ { \
2274+ wchar_t uwc; \
2275+ char mbc[MB_LEN_MAX]; \
2276+ mbstate_t state_wc; \
2277+ \
2278+ for (NEW_LEN = i = 0; i < LEN;) \
2279+ { \
2280+ mbstate_t state_bak; \
2281+ \
2282+ state_bak = STATE; \
2283+ MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
2284+ \
2285+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
2286+ || MBLENGTH == 0) \
2287+ { \
2288+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
2289+ STATE = state_bak; \
2290+ if (!ignore) \
2291+ COPY[NEW_LEN++] = TEXT[i++]; \
2292+ continue; \
2293+ } \
2294+ \
2295+ if (ignore) \
2296+ { \
2297+ if ((ignore == nonprinting && !iswprint (WC)) \
2298+ || (ignore == nondictionary \
2299+ && !iswalnum (WC) && !iswblank (WC))) \
2300+ { \
2301+ i += MBLENGTH; \
2302+ continue; \
2303+ } \
2304+ } \
2305+ \
2306+ if (translate) \
2307+ { \
2308+ \
2309+ uwc = towupper(WC); \
2310+ if (WC == uwc) \
2311+ { \
2312+ memcpy (mbc, TEXT + i, MBLENGTH); \
2313+ i += MBLENGTH; \
2314+ } \
2315+ else \
2316+ { \
2317+ i += MBLENGTH; \
2318+ WC = uwc; \
2319+ memset (&state_wc, '\0', sizeof (mbstate_t)); \
2320+ \
2321+ MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
2322+ assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
2323+ } \
2324+ \
2325+ for (j = 0; j < MBLENGTH; j++) \
2326+ COPY[NEW_LEN++] = mbc[j]; \
2327+ } \
2328+ else \
2329+ for (j = 0; j < MBLENGTH; j++) \
2330+ COPY[NEW_LEN++] = TEXT[i++]; \
2331+ } \
2332+ COPY[NEW_LEN] = '\0'; \
2333+ } \
2334+ while (0)
2335+ IGNORE_CHARS (new_len_a, lena, texta, copy_a,
2336+ wc_a, mblength_a, state_a);
2337+ IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
2338+ wc_b, mblength_b, state_b);
2339+ diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
2340+ }
2341+ else if (lena == 0)
2342+ diff = - NONZERO (lenb);
2343+ else if (lenb == 0)
2344+ goto greater;
2345+ else
2346+ diff = xmemcoll (texta, lena, textb, lenb);
2347+ }
2348+
2349+ if (diff)
2350+ goto not_equal;
2351+
2352+ key = key->next;
2353+ if (! key)
2354+ break;
2355+
2356+ /* Find the beginning and limit of the next field. */
2357+ if (key->eword != -1)
2358+ lima = limfield (a, key), limb = limfield (b, key);
2359+ else
2360+ lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2361+
2362+ if (key->sword != -1)
2363+ texta = begfield (a, key), textb = begfield (b, key);
2364+ else
2365+ {
2366+ texta = a->text, textb = b->text;
2367+ if (key->skipsblanks)
2368+ {
2369+ while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
2370+ texta += mblength_a;
2371+ while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
2372+ textb += mblength_b;
2373+ }
2374+ }
2375+ }
2376+
2377+ return 0;
2378+
2379+greater:
2380+ diff = 1;
2381+not_equal:
2382+ return key->reverse ? -diff : diff;
2383+}
2384+#endif
2385+
2386 /* Compare two lines A and B, returning negative, zero, or positive
2387 depending on whether A compares less than, equal to, or greater than B. */
2388
2389@@ -2744,7 +3305,7 @@
2390 initialize_exit_failure (SORT_FAILURE);
2391
2392 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2393-#if HAVE_NL_LANGINFO
2394+#if HAVE_LANGINFO_CODESET
2395 hard_LC_TIME = hard_locale (LC_TIME);
2396 #endif
2397
2398@@ -2765,6 +3326,27 @@
2399 thousands_sep = -1;
2400 }
2401
2402+#if HAVE_MBRTOWC
2403+ if (MB_CUR_MAX > 1)
2404+ {
2405+ inittables = inittables_mb;
2406+ begfield = begfield_mb;
2407+ limfield = limfield_mb;
2408+ getmonth = getmonth_mb;
2409+ keycompare = keycompare_mb;
2410+ numcompare = numcompare_mb;
2411+ }
2412+ else
2413+#endif
2414+ {
2415+ inittables = inittables_uni;
2416+ begfield = begfield_uni;
2417+ limfield = limfield_uni;
2418+ getmonth = getmonth_uni;
2419+ keycompare = keycompare_uni;
2420+ numcompare = numcompare_uni;
2421+ }
2422+
2423 have_read_stdin = false;
2424 inittables ();
2425
2426@@ -3015,13 +3597,35 @@
2427
2428 case 't':
2429 {
2430- char newtab = optarg[0];
2431- if (! newtab)
2432+ char newtab[MB_LEN_MAX + 1];
2433+ size_t newtab_length = 1;
2434+ strncpy (newtab, optarg, MB_LEN_MAX);
2435+ if (! newtab[0])
2436 error (SORT_FAILURE, 0, _("empty tab"));
2437- if (optarg[1])
2438+#if HAVE_MBRTOWC
2439+ if (MB_CUR_MAX > 1)
2440+ {
2441+ wchar_t wc;
2442+ mbstate_t state;
2443+ size_t i;
2444+
2445+ memset (&state, '\0', sizeof (mbstate_t));
2446+ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
2447+ MB_LEN_MAX),
2448+ &state);
2449+ switch (newtab_length)
2450+ {
2451+ case (size_t) -1:
2452+ case (size_t) -2:
2453+ case 0:
2454+ newtab_length = 1;
2455+ }
2456+ }
2457+#endif
2458+ if (newtab_length == 1 && optarg[1])
2459 {
2460 if (STREQ (optarg, "\\0"))
2461- newtab = '\0';
2462+ newtab[0] = '\0';
2463 else
2464 {
2465 /* Provoke with `sort -txx'. Complain about
2466@@ -3032,9 +3636,12 @@
2467 quote (optarg));
2468 }
2469 }
2470- if (tab != TAB_DEFAULT && tab != newtab)
2471+ if (tab_length
2472+ && (tab_length != newtab_length
2473+ || memcmp (tab, newtab, tab_length) != 0))
2474 error (SORT_FAILURE, 0, _("incompatible tabs"));
2475- tab = newtab;
2476+ memcpy (tab, newtab, newtab_length);
2477+ tab_length = newtab_length;
2478 }
2479 break;
2480
2481--- coreutils-6.8+/src/unexpand.c.i18n 2007-01-14 15:41:28.000000000 +0000
2482+++ coreutils-6.8+/src/unexpand.c 2007-03-01 15:08:24.000000000 +0000
2483@@ -39,11 +39,28 @@
2484 #include <stdio.h>
2485 #include <getopt.h>
2486 #include <sys/types.h>
2487+
2488+/* Get mbstate_t, mbrtowc(), wcwidth(). */
2489+#if HAVE_WCHAR_H
2490+# include <wchar.h>
2491+#endif
2492+
2493 #include "system.h"
2494 #include "error.h"
2495 #include "quote.h"
2496 #include "xstrndup.h"
2497
2498+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
2499+ installation; work around this configuration error. */
2500+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
2501+# define MB_LEN_MAX 16
2502+#endif
2503+
2504+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
2505+#if HAVE_MBRTOWC && defined mbstate_t
2506+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
2507+#endif
2508+
2509 /* The official name of this program (e.g., no `g' prefix). */
2510 #define PROGRAM_NAME "unexpand"
2511
2512@@ -110,6 +127,208 @@
2513 {NULL, 0, NULL, 0}
2514 };
2515
2516+static FILE *next_file (FILE *fp);
2517+
2518+#if HAVE_MBRTOWC
2519+static void
2520+unexpand_multibyte (void)
2521+{
2522+ FILE *fp; /* Input stream. */
2523+ mbstate_t i_state; /* Current shift state of the input stream. */
2524+ mbstate_t i_state_bak; /* Back up the I_STATE. */
2525+ mbstate_t o_state; /* Current shift state of the output stream. */
2526+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
2527+ char *bufpos; /* Next read position of BUF. */
2528+ size_t buflen = 0; /* The length of the byte sequence in buf. */
2529+ wint_t wc; /* A gotten wide character. */
2530+ size_t mblength; /* The byte size of a multibyte character
2531+ which shows as same character as WC. */
2532+
2533+ /* Index in `tab_list' of next tabstop: */
2534+ int tab_index = 0; /* For calculating width of pending tabs. */
2535+ int print_tab_index = 0; /* For printing as many tabs as possible. */
2536+ unsigned int column = 0; /* Column on screen of next char. */
2537+ int next_tab_column; /* Column the next tab stop is on. */
2538+ int convert = 1; /* If nonzero, perform translations. */
2539+ unsigned int pending = 0; /* Pending columns of blanks. */
2540+
2541+ fp = next_file ((FILE *) NULL);
2542+ if (fp == NULL)
2543+ return;
2544+
2545+ memset (&o_state, '\0', sizeof(mbstate_t));
2546+ memset (&i_state, '\0', sizeof(mbstate_t));
2547+
2548+ for (;;)
2549+ {
2550+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
2551+ {
2552+ memmove (buf, bufpos, buflen);
2553+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
2554+ bufpos = buf;
2555+ }
2556+
2557+ /* Get a wide character. */
2558+ if (buflen < 1)
2559+ {
2560+ mblength = 1;
2561+ wc = WEOF;
2562+ }
2563+ else
2564+ {
2565+ i_state_bak = i_state;
2566+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
2567+ }
2568+
2569+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
2570+ {
2571+ i_state = i_state_bak;
2572+ wc = L'\0';
2573+ }
2574+
2575+ if (wc == L' ' && convert && column < INT_MAX)
2576+ {
2577+ ++pending;
2578+ ++column;
2579+ }
2580+ else if (wc == L'\t' && convert)
2581+ {
2582+ if (tab_size == 0)
2583+ {
2584+ /* Do not let tab_index == first_free_tab;
2585+ stop when it is 1 less. */
2586+ while (tab_index < first_free_tab - 1
2587+ && column >= tab_list[tab_index])
2588+ tab_index++;
2589+ next_tab_column = tab_list[tab_index];
2590+ if (tab_index < first_free_tab - 1)
2591+ tab_index++;
2592+ if (column >= next_tab_column)
2593+ {
2594+ convert = 0; /* Ran out of tab stops. */
2595+ goto flush_pend_mb;
2596+ }
2597+ }
2598+ else
2599+ {
2600+ next_tab_column = column + tab_size - column % tab_size;
2601+ }
2602+ pending += next_tab_column - column;
2603+ column = next_tab_column;
2604+ }
2605+ else
2606+ {
2607+flush_pend_mb:
2608+ /* Flush pending spaces. Print as many tabs as possible,
2609+ then print the rest as spaces. */
2610+ if (pending == 1)
2611+ {
2612+ putchar (' ');
2613+ pending = 0;
2614+ }
2615+ column -= pending;
2616+ while (pending > 0)
2617+ {
2618+ if (tab_size == 0)
2619+ {
2620+ /* Do not let print_tab_index == first_free_tab;
2621+ stop when it is 1 less. */
2622+ while (print_tab_index < first_free_tab - 1
2623+ && column >= tab_list[print_tab_index])
2624+ print_tab_index++;
2625+ next_tab_column = tab_list[print_tab_index];
2626+ if (print_tab_index < first_free_tab - 1)
2627+ print_tab_index++;
2628+ }
2629+ else
2630+ {
2631+ next_tab_column =
2632+ column + tab_size - column % tab_size;
2633+ }
2634+ if (next_tab_column - column <= pending)
2635+ {
2636+ putchar ('\t');
2637+ pending -= next_tab_column - column;
2638+ column = next_tab_column;
2639+ }
2640+ else
2641+ {
2642+ --print_tab_index;
2643+ column += pending;
2644+ while (pending != 0)
2645+ {
2646+ putchar (' ');
2647+ pending--;
2648+ }
2649+ }
2650+ }
2651+
2652+ if (wc == WEOF)
2653+ {
2654+ fp = next_file (fp);
2655+ if (fp == NULL)
2656+ break; /* No more files. */
2657+ else
2658+ {
2659+ memset (&i_state, '\0', sizeof(mbstate_t));
2660+ continue;
2661+ }
2662+ }
2663+
2664+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
2665+ {
2666+ if (convert)
2667+ {
2668+ ++column;
2669+ if (convert_entire_line == 0)
2670+ convert = 0;
2671+ }
2672+ mblength = 1;
2673+ putchar (buf[0]);
2674+ }
2675+ else if (mblength == 0)
2676+ {
2677+ if (convert && convert_entire_line == 0)
2678+ convert = 0;
2679+ mblength = 1;
2680+ putchar ('\0');
2681+ }
2682+ else
2683+ {
2684+ if (convert)
2685+ {
2686+ if (wc == L'\b')
2687+ {
2688+ if (column > 0)
2689+ --column;
2690+ }
2691+ else
2692+ {
2693+ int width; /* The width of WC. */
2694+
2695+ width = wcwidth (wc);
2696+ column += (width > 0) ? width : 0;
2697+ if (convert_entire_line == 0)
2698+ convert = 0;
2699+ }
2700+ }
2701+
2702+ if (wc == L'\n')
2703+ {
2704+ tab_index = print_tab_index = 0;
2705+ column = pending = 0;
2706+ convert = 1;
2707+ }
2708+ fwrite (bufpos, sizeof(char), mblength, stdout);
2709+ }
2710+ }
2711+ buflen -= mblength;
2712+ bufpos += mblength;
2713+ }
2714+}
2715+#endif
2716+
2717+
2718 void
2719 usage (int status)
2720 {
2721@@ -531,7 +750,12 @@
2722
2723 file_list = (optind < argc ? &argv[optind] : stdin_argv);
2724
2725- unexpand ();
2726+#if HAVE_MBRTOWC
2727+ if (MB_CUR_MAX > 1)
2728+ unexpand_multibyte ();
2729+ else
2730+#endif
2731+ unexpand ();
2732
2733 if (have_read_stdin && fclose (stdin) != 0)
2734 error (EXIT_FAILURE, errno, "-");
2735--- coreutils-6.8+/src/pr.c.i18n 2007-01-14 15:41:28.000000000 +0000
2736+++ coreutils-6.8+/src/pr.c 2007-03-01 15:08:24.000000000 +0000
2737@@ -313,6 +313,32 @@
2738
2739 #include <getopt.h>
2740 #include <sys/types.h>
2741+
2742+/* Get MB_LEN_MAX. */
2743+#include <limits.h>
2744+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
2745+ installation; work around this configuration error. */
2746+#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
2747+# define MB_LEN_MAX 16
2748+#endif
2749+
2750+/* Get MB_CUR_MAX. */
2751+#include <stdlib.h>
2752+
2753+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
2754+/* Get mbstate_t, mbrtowc(), wcwidth(). */
2755+#if HAVE_WCHAR_H
2756+# include <wchar.h>
2757+#endif
2758+
2759+/* Get iswprint(). -- for wcwidth(). */
2760+#if HAVE_WCTYPE_H
2761+# include <wctype.h>
2762+#endif
2763+#if !defined iswprint && !HAVE_ISWPRINT
2764+# define iswprint(wc) 1
2765+#endif
2766+
2767 #include "system.h"
2768 #include "error.h"
2769 #include "hard-locale.h"
2770@@ -324,6 +350,18 @@
2771 #include "strftime.h"
2772 #include "xstrtol.h"
2773
2774+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
2775+#if HAVE_MBRTOWC && defined mbstate_t
2776+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
2777+#endif
2778+
2779+#ifndef HAVE_DECL_WCWIDTH
2780+"this configure-time declaration test was not run"
2781+#endif
2782+#if !HAVE_DECL_WCWIDTH
2783+extern int wcwidth ();
2784+#endif
2785+
2786 /* The official name of this program (e.g., no `g' prefix). */
2787 #define PROGRAM_NAME "pr"
2788
2789@@ -416,7 +454,20 @@
2790
2791 #define NULLCOL (COLUMN *)0
2792
2793-static int char_to_clump (char c);
2794+/* Funtion pointers to switch functions for single byte locale or for
2795+ multibyte locale. If multibyte functions do not exist in your sysytem,
2796+ these pointers always point the function for single byte locale. */
2797+static void (*print_char) (char c);
2798+static int (*char_to_clump) (char c);
2799+
2800+/* Functions for single byte locale. */
2801+static void print_char_single (char c);
2802+static int char_to_clump_single (char c);
2803+
2804+/* Functions for multibyte locale. */
2805+static void print_char_multi (char c);
2806+static int char_to_clump_multi (char c);
2807+
2808 static bool read_line (COLUMN *p);
2809 static bool print_page (void);
2810 static bool print_stored (COLUMN *p);
2811@@ -426,6 +477,7 @@
2812 static void pad_across_to (int position);
2813 static void add_line_number (COLUMN *p);
2814 static void getoptarg (char *arg, char switch_char, char *character,
2815+ int *character_length, int *character_width,
2816 int *number);
2817 void usage (int status);
2818 static void print_files (int number_of_files, char **av);
2819@@ -440,7 +492,6 @@
2820 static void pad_down (int lines);
2821 static void read_rest_of_line (COLUMN *p);
2822 static void skip_read (COLUMN *p, int column_number);
2823-static void print_char (char c);
2824 static void cleanup (void);
2825 static void print_sep_string (void);
2826 static void separator_string (const char *optarg_S);
2827@@ -455,7 +506,7 @@
2828 we store the leftmost columns contiguously in buff.
2829 To print a line from buff, get the index of the first character
2830 from line_vector[i], and print up to line_vector[i + 1]. */
2831-static char *buff;
2832+static unsigned char *buff;
2833
2834 /* Index of the position in buff where the next character
2835 will be stored. */
2836@@ -559,7 +610,7 @@
2837 static bool untabify_input = false;
2838
2839 /* (-e) The input tab character. */
2840-static char input_tab_char = '\t';
2841+static char input_tab_char[MB_LEN_MAX] = "\t";
2842
2843 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
2844 where the leftmost column is 1. */
2845@@ -569,7 +620,10 @@
2846 static bool tabify_output = false;
2847
2848 /* (-i) The output tab character. */
2849-static char output_tab_char = '\t';
2850+static char output_tab_char[MB_LEN_MAX] = "\t";
2851+
2852+/* (-i) The byte length of output tab character. */
2853+static int output_tab_char_length = 1;
2854
2855 /* (-i) The width of the output tab. */
2856 static int chars_per_output_tab = 8;
2857@@ -643,7 +697,13 @@
2858 static bool numbered_lines = false;
2859
2860 /* (-n) Character which follows each line number. */
2861-static char number_separator = '\t';
2862+static char number_separator[MB_LEN_MAX] = "\t";
2863+
2864+/* (-n) The byte length of the character which follows each line number. */
2865+static int number_separator_length = 1;
2866+
2867+/* (-n) The character width of the character which follows each line number. */
2868+static int number_separator_width = 0;
2869
2870 /* (-n) line counting starts with 1st line of input file (not with 1st
2871 line of 1st page printed). */
2872@@ -696,6 +756,7 @@
2873 -a|COLUMN|-m is a `space' and with the -J option a `tab'. */
2874 static char *col_sep_string = "";
2875 static int col_sep_length = 0;
2876+static int col_sep_width = 0;
2877 static char *column_separator = " ";
2878 static char *line_separator = "\t";
2879
2880@@ -852,6 +913,13 @@
2881 col_sep_length = (int) strlen (optarg_S);
2882 col_sep_string = xmalloc (col_sep_length + 1);
2883 strcpy (col_sep_string, optarg_S);
2884+
2885+#if HAVE_MBRTOWC
2886+ if (MB_CUR_MAX > 1)
2887+ col_sep_width = mbswidth (col_sep_string, 0);
2888+ else
2889+#endif
2890+ col_sep_width = col_sep_length;
2891 }
2892
2893 int
2894@@ -877,6 +945,21 @@
2895
2896 atexit (close_stdout);
2897
2898+/* Define which functions are used, the ones for single byte locale or the ones
2899+ for multibyte locale. */
2900+#if HAVE_MBRTOWC
2901+ if (MB_CUR_MAX > 1)
2902+ {
2903+ print_char = print_char_multi;
2904+ char_to_clump = char_to_clump_multi;
2905+ }
2906+ else
2907+#endif
2908+ {
2909+ print_char = print_char_single;
2910+ char_to_clump = char_to_clump_single;
2911+ }
2912+
2913 n_files = 0;
2914 file_names = (argc > 1
2915 ? xmalloc ((argc - 1) * sizeof (char *))
2916@@ -949,8 +1032,12 @@
2917 break;
2918 case 'e':
2919 if (optarg)
2920- getoptarg (optarg, 'e', &input_tab_char,
2921- &chars_per_input_tab);
2922+ {
2923+ int dummy_length, dummy_width;
2924+
2925+ getoptarg (optarg, 'e', input_tab_char, &dummy_length,
2926+ &dummy_width, &chars_per_input_tab);
2927+ }
2928 /* Could check tab width > 0. */
2929 untabify_input = true;
2930 break;
2931@@ -963,8 +1050,12 @@
2932 break;
2933 case 'i':
2934 if (optarg)
2935- getoptarg (optarg, 'i', &output_tab_char,
2936- &chars_per_output_tab);
2937+ {
2938+ int dummy_width;
2939+
2940+ getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
2941+ &dummy_width, &chars_per_output_tab);
2942+ }
2943 /* Could check tab width > 0. */
2944 tabify_output = true;
2945 break;
2946@@ -991,8 +1082,8 @@
2947 case 'n':
2948 numbered_lines = true;
2949 if (optarg)
2950- getoptarg (optarg, 'n', &number_separator,
2951- &chars_per_number);
2952+ getoptarg (optarg, 'n', number_separator, &number_separator_length,
2953+ &number_separator_width, &chars_per_number);
2954 break;
2955 case 'N':
2956 skip_count = false;
2957@@ -1031,7 +1122,7 @@
2958 old_s = false;
2959 /* Reset an additional input of -s, -S dominates -s */
2960 col_sep_string = "";
2961- col_sep_length = 0;
2962+ col_sep_length = col_sep_width = 0;
2963 use_col_separator = true;
2964 if (optarg)
2965 separator_string (optarg);
2966@@ -1188,10 +1279,45 @@
2967 a number. */
2968
2969 static void
2970-getoptarg (char *arg, char switch_char, char *character, int *number)
2971+getoptarg (char *arg, char switch_char, char *character, int *character_length,
2972+ int *character_width, int *number)
2973 {
2974 if (!ISDIGIT (*arg))
2975- *character = *arg++;
2976+ {
2977+#ifdef HAVE_MBRTOWC
2978+ if (MB_CUR_MAX > 1) /* for multibyte locale. */
2979+ {
2980+ wchar_t wc;
2981+ size_t mblength;
2982+ int width;
2983+ mbstate_t state = {'\0'};
2984+
2985+ mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
2986+
2987+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
2988+ {
2989+ *character_length = 1;
2990+ *character_width = 1;
2991+ }
2992+ else
2993+ {
2994+ *character_length = (mblength < 1) ? 1 : mblength;
2995+ width = wcwidth (wc);
2996+ *character_width = (width < 0) ? 0 : width;
2997+ }
2998+
2999+ strncpy (character, arg, *character_length);
3000+ arg += *character_length;
3001+ }
3002+ else /* for single byte locale. */
3003+#endif
3004+ {
3005+ *character = *arg++;
3006+ *character_length = 1;
3007+ *character_width = 1;
3008+ }
3009+ }
3010+
3011 if (*arg)
3012 {
3013 long int tmp_long;
3014@@ -1256,7 +1382,7 @@
3015 else
3016 col_sep_string = column_separator;
3017
3018- col_sep_length = 1;
3019+ col_sep_length = col_sep_width = 1;
3020 use_col_separator = true;
3021 }
3022 /* It's rather pointless to define a TAB separator with column
3023@@ -1288,11 +1414,11 @@
3024 TAB_WIDTH (chars_per_input_tab, chars_per_number); */
3025
3026 /* Estimate chars_per_text without any margin and keep it constant. */
3027- if (number_separator == '\t')
3028+ if (number_separator[0] == '\t')
3029 number_width = chars_per_number +
3030 TAB_WIDTH (chars_per_default_tab, chars_per_number);
3031 else
3032- number_width = chars_per_number + 1;
3033+ number_width = chars_per_number + number_separator_width;
3034
3035 /* The number is part of the column width unless we are
3036 printing files in parallel. */
3037@@ -1307,7 +1433,7 @@
3038 }
3039
3040 chars_per_column = (chars_per_line - chars_used_by_number -
3041- (columns - 1) * col_sep_length) / columns;
3042+ (columns - 1) * col_sep_width) / columns;
3043
3044 if (chars_per_column < 1)
3045 error (EXIT_FAILURE, 0, _("page width too narrow"));
3046@@ -1432,7 +1558,7 @@
3047
3048 /* Enlarge p->start_position of first column to use the same form of
3049 padding_not_printed with all columns. */
3050- h = h + col_sep_length;
3051+ h = h + col_sep_width;
3052
3053 /* This loop takes care of all but the rightmost column. */
3054
3055@@ -1466,7 +1592,7 @@
3056 }
3057 else
3058 {
3059- h = h_next + col_sep_length;
3060+ h = h_next + col_sep_width;
3061 h_next = h + chars_per_column;
3062 }
3063 }
3064@@ -1756,9 +1882,9 @@
3065 align_column (COLUMN *p)
3066 {
3067 padding_not_printed = p->start_position;
3068- if (padding_not_printed - col_sep_length > 0)
3069+ if (padding_not_printed - col_sep_width > 0)
3070 {
3071- pad_across_to (padding_not_printed - col_sep_length);
3072+ pad_across_to (padding_not_printed - col_sep_width);
3073 padding_not_printed = ANYWHERE;
3074 }
3075
3076@@ -2029,13 +2155,13 @@
3077 /* May be too generous. */
3078 buff = X2REALLOC (buff, &buff_allocated);
3079 }
3080- buff[buff_current++] = c;
3081+ buff[buff_current++] = (unsigned char) c;
3082 }
3083
3084 static void
3085 add_line_number (COLUMN *p)
3086 {
3087- int i;
3088+ int i, j;
3089 char *s;
3090 int left_cut;
3091
3092@@ -2058,22 +2184,24 @@
3093 /* Tabification is assumed for multiple columns, also for n-separators,
3094 but `default n-separator = TAB' hasn't been given priority over
3095 equal column_width also specified by POSIX. */
3096- if (number_separator == '\t')
3097+ if (number_separator[0] == '\t')
3098 {
3099 i = number_width - chars_per_number;
3100 while (i-- > 0)
3101 (p->char_func) (' ');
3102 }
3103 else
3104- (p->char_func) (number_separator);
3105+ for (j = 0; j < number_separator_length; j++)
3106+ (p->char_func) (number_separator[j]);
3107 }
3108 else
3109 /* To comply with POSIX, we avoid any expansion of default TAB
3110 separator with a single column output. No column_width requirement
3111 has to be considered. */
3112 {
3113- (p->char_func) (number_separator);
3114- if (number_separator == '\t')
3115+ for (j = 0; j < number_separator_length; j++)
3116+ (p->char_func) (number_separator[j]);
3117+ if (number_separator[0] == '\t')
3118 output_position = POS_AFTER_TAB (chars_per_output_tab,
3119 output_position);
3120 }
3121@@ -2234,7 +2362,7 @@
3122 while (goal - h_old > 1
3123 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
3124 {
3125- putchar (output_tab_char);
3126+ fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
3127 h_old = h_new;
3128 }
3129 while (++h_old <= goal)
3130@@ -2254,6 +2382,7 @@
3131 {
3132 char *s;
3133 int l = col_sep_length;
3134+ int not_space_flag;
3135
3136 s = col_sep_string;
3137
3138@@ -2267,6 +2396,7 @@
3139 {
3140 for (; separators_not_printed > 0; --separators_not_printed)
3141 {
3142+ not_space_flag = 0;
3143 while (l-- > 0)
3144 {
3145 /* 3 types of sep_strings: spaces only, spaces and chars,
3146@@ -2280,12 +2410,15 @@
3147 }
3148 else
3149 {
3150+ not_space_flag = 1;
3151 if (spaces_not_printed > 0)
3152 print_white_space ();
3153 putchar (*s++);
3154- ++output_position;
3155 }
3156 }
3157+ if (not_space_flag)
3158+ output_position += col_sep_width;
3159+
3160 /* sep_string ends with some spaces */
3161 if (spaces_not_printed > 0)
3162 print_white_space ();
3163@@ -2313,7 +2446,7 @@
3164 required number of tabs and spaces. */
3165
3166 static void
3167-print_char (char c)
3168+print_char_single (char c)
3169 {
3170 if (tabify_output)
3171 {
3172@@ -2337,6 +2470,74 @@
3173 putchar (c);
3174 }
3175
3176+#ifdef HAVE_MBRTOWC
3177+static void
3178+print_char_multi (char c)
3179+{
3180+ static size_t mbc_pos = 0;
3181+ static char mbc[MB_LEN_MAX] = {'\0'};
3182+ static mbstate_t state = {'\0'};
3183+ mbstate_t state_bak;
3184+ wchar_t wc;
3185+ size_t mblength;
3186+ int width;
3187+
3188+ if (tabify_output)
3189+ {
3190+ state_bak = state;
3191+ mbc[mbc_pos++] = c;
3192+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
3193+
3194+ while (mbc_pos > 0)
3195+ {
3196+ switch (mblength)
3197+ {
3198+ case (size_t)-2:
3199+ state = state_bak;
3200+ return;
3201+
3202+ case (size_t)-1:
3203+ state = state_bak;
3204+ ++output_position;
3205+ putchar (mbc[0]);
3206+ memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
3207+ --mbc_pos;
3208+ break;
3209+
3210+ case 0:
3211+ mblength = 1;
3212+
3213+ default:
3214+ if (wc == L' ')
3215+ {
3216+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
3217+ --mbc_pos;
3218+ ++spaces_not_printed;
3219+ return;
3220+ }
3221+ else if (spaces_not_printed > 0)
3222+ print_white_space ();
3223+
3224+ /* Nonprintables are assumed to have width 0, except L'\b'. */
3225+ if ((width = wcwidth (wc)) < 1)
3226+ {
3227+ if (wc == L'\b')
3228+ --output_position;
3229+ }
3230+ else
3231+ output_position += width;
3232+
3233+ fwrite (mbc, sizeof(char), mblength, stdout);
3234+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
3235+ mbc_pos -= mblength;
3236+ }
3237+ }
3238+ return;
3239+ }
3240+ putchar (c);
3241+}
3242+#endif
3243+
3244 /* Skip to page PAGE before printing.
3245 PAGE may be larger than total number of pages. */
3246
3247@@ -2517,9 +2718,9 @@
3248 align_empty_cols = false;
3249 }
3250
3251- if (padding_not_printed - col_sep_length > 0)
3252+ if (padding_not_printed - col_sep_width > 0)
3253 {
3254- pad_across_to (padding_not_printed - col_sep_length);
3255+ pad_across_to (padding_not_printed - col_sep_width);
3256 padding_not_printed = ANYWHERE;
3257 }
3258
3259@@ -2620,9 +2821,9 @@
3260 }
3261 }
3262
3263- if (padding_not_printed - col_sep_length > 0)
3264+ if (padding_not_printed - col_sep_width > 0)
3265 {
3266- pad_across_to (padding_not_printed - col_sep_length);
3267+ pad_across_to (padding_not_printed - col_sep_width);
3268 padding_not_printed = ANYWHERE;
3269 }
3270
3271@@ -2635,8 +2836,8 @@
3272 if (spaces_not_printed == 0)
3273 {
3274 output_position = p->start_position + end_vector[line];
3275- if (p->start_position - col_sep_length == chars_per_margin)
3276- output_position -= col_sep_length;
3277+ if (p->start_position - col_sep_width == chars_per_margin)
3278+ output_position -= col_sep_width;
3279 }
3280
3281 return true;
3282@@ -2655,7 +2856,7 @@
3283 number of characters is 1.) */
3284
3285 static int
3286-char_to_clump (char c)
3287+char_to_clump_single (char c)
3288 {
3289 unsigned char uc = c;
3290 char *s = clump_buff;
3291@@ -2665,10 +2866,10 @@
3292 int chars;
3293 int chars_per_c = 8;
3294
3295- if (c == input_tab_char)
3296+ if (c == input_tab_char[0])
3297 chars_per_c = chars_per_input_tab;
3298
3299- if (c == input_tab_char || c == '\t')
3300+ if (c == input_tab_char[0] || c == '\t')
3301 {
3302 width = TAB_WIDTH (chars_per_c, input_position);
3303
3304@@ -2739,6 +2940,154 @@
3305 return chars;
3306 }
3307
3308+#ifdef HAVE_MBRTOWC
3309+static int
3310+char_to_clump_multi (char c)
3311+{
3312+ static size_t mbc_pos = 0;
3313+ static char mbc[MB_LEN_MAX] = {'\0'};
3314+ static mbstate_t state = {'\0'};
3315+ mbstate_t state_bak;
3316+ wchar_t wc;
3317+ size_t mblength;
3318+ int wc_width;
3319+ register char *s = clump_buff;
3320+ register int i, j;
3321+ char esc_buff[4];
3322+ int width;
3323+ int chars;
3324+ int chars_per_c = 8;
3325+
3326+ state_bak = state;
3327+ mbc[mbc_pos++] = c;
3328+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
3329+
3330+ width = 0;
3331+ chars = 0;
3332+ while (mbc_pos > 0)
3333+ {
3334+ switch (mblength)
3335+ {
3336+ case (size_t)-2:
3337+ state = state_bak;
3338+ return 0;
3339+
3340+ case (size_t)-1:
3341+ state = state_bak;
3342+ mblength = 1;
3343+
3344+ if (use_esc_sequence || use_cntrl_prefix)
3345+ {
3346+ width = +4;
3347+ chars = +4;
3348+ *s++ = '\\';
3349+ sprintf (esc_buff, "%03o", mbc[0]);
3350+ for (i = 0; i <= 2; ++i)
3351+ *s++ = (int) esc_buff[i];
3352+ }
3353+ else
3354+ {
3355+ width += 1;
3356+ chars += 1;
3357+ *s++ = mbc[0];
3358+ }
3359+ break;
3360+
3361+ case 0:
3362+ mblength = 1;
3363+ /* Fall through */
3364+
3365+ default:
3366+ if (memcmp (mbc, input_tab_char, mblength) == 0)
3367+ chars_per_c = chars_per_input_tab;
3368+
3369+ if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
3370+ {
3371+ int width_inc;
3372+
3373+ width_inc = TAB_WIDTH (chars_per_c, input_position);
3374+ width += width_inc;
3375+
3376+ if (untabify_input)
3377+ {
3378+ for (i = width_inc; i; --i)
3379+ *s++ = ' ';
3380+ chars += width_inc;
3381+ }
3382+ else
3383+ {
3384+ for (i = 0; i < mblength; i++)
3385+ *s++ = mbc[i];
3386+ chars += mblength;
3387+ }
3388+ }
3389+ else if ((wc_width = wcwidth (wc)) < 1)
3390+ {
3391+ if (use_esc_sequence)
3392+ {
3393+ for (i = 0; i < mblength; i++)
3394+ {
3395+ width += 4;
3396+ chars += 4;
3397+ *s++ = '\\';
3398+ sprintf (esc_buff, "%03o", c);
3399+ for (j = 0; j <= 2; ++j)
3400+ *s++ = (int) esc_buff[j];
3401+ }
3402+ }
3403+ else if (use_cntrl_prefix)
3404+ {
3405+ if (wc < 0200)
3406+ {
3407+ width += 2;
3408+ chars += 2;
3409+ *s++ = '^';
3410+ *s++ = wc ^ 0100;
3411+ }
3412+ else
3413+ {
3414+ for (i = 0; i < mblength; i++)
3415+ {
3416+ width += 4;
3417+ chars += 4;
3418+ *s++ = '\\';
3419+ sprintf (esc_buff, "%03o", c);
3420+ for (j = 0; j <= 2; ++j)
3421+ *s++ = (int) esc_buff[j];
3422+ }
3423+ }
3424+ }
3425+ else if (wc == L'\b')
3426+ {
3427+ width += -1;
3428+ chars += 1;
3429+ *s++ = c;
3430+ }
3431+ else
3432+ {
3433+ width += 0;
3434+ chars += mblength;
3435+ for (i = 0; i < mblength; i++)
3436+ *s++ = mbc[i];
3437+ }
3438+ }
3439+ else
3440+ {
3441+ width += wc_width;
3442+ chars += mblength;
3443+ for (i = 0; i < mblength; i++)
3444+ *s++ = mbc[i];
3445+ }
3446+ }
3447+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
3448+ mbc_pos -= mblength;
3449+ }
3450+
3451+ input_position += width;
3452+ return chars;
3453+}
3454+#endif
3455+
3456 /* We've just printed some files and need to clean up things before
3457 looking for more options and printing the next batch of files.
3458
3459--- coreutils-6.8+/src/cut.c.i18n 2007-01-14 15:41:28.000000000 +0000
3460+++ coreutils-6.8+/src/cut.c 2007-03-01 15:08:24.000000000 +0000
3461@@ -29,6 +29,11 @@
3462 #include <assert.h>
3463 #include <getopt.h>
3464 #include <sys/types.h>
3465+
3466+/* Get mbstate_t, mbrtowc(). */
3467+#if HAVE_WCHAR_H
3468+# include <wchar.h>
3469+#endif
3470 #include "system.h"
3471
3472 #include "error.h"
3473@@ -37,6 +42,18 @@
3474 #include "quote.h"
3475 #include "xstrndup.h"
3476
3477+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3478+ installation; work around this configuration error. */
3479+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3480+# undef MB_LEN_MAX
3481+# define MB_LEN_MAX 16
3482+#endif
3483+
3484+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3485+#if HAVE_MBRTOWC && defined mbstate_t
3486+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3487+#endif
3488+
3489 /* The official name of this program (e.g., no `g' prefix). */
3490 #define PROGRAM_NAME "cut"
3491
3492@@ -67,6 +84,52 @@
3493 } \
3494 while (0)
3495
3496+/* Refill the buffer BUF to get a multibyte character. */
3497+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
3498+ do \
3499+ { \
3500+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
3501+ { \
3502+ memmove (BUF, BUFPOS, BUFLEN); \
3503+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
3504+ BUFPOS = BUF; \
3505+ } \
3506+ } \
3507+ while (0)
3508+
3509+/* Get wide character on BUFPOS. BUFPOS is not included after that.
3510+ If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
3511+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
3512+ do \
3513+ { \
3514+ mbstate_t state_bak; \
3515+ \
3516+ if (BUFLEN < 1) \
3517+ { \
3518+ WC = WEOF; \
3519+ break; \
3520+ } \
3521+ \
3522+ /* Get a wide character. */ \
3523+ CONVFAIL = 0; \
3524+ state_bak = STATE; \
3525+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
3526+ \
3527+ switch (MBLENGTH) \
3528+ { \
3529+ case (size_t)-1: \
3530+ case (size_t)-2: \
3531+ CONVFAIL++; \
3532+ STATE = state_bak; \
3533+ /* Fall througn. */ \
3534+ \
3535+ case 0: \
3536+ MBLENGTH = 1; \
3537+ break; \
3538+ } \
3539+ } \
3540+ while (0)
3541+
3542 struct range_pair
3543 {
3544 size_t lo;
3545@@ -85,7 +148,7 @@
3546 /* The number of bytes allocated for FIELD_1_BUFFER. */
3547 static size_t field_1_bufsize;
3548
3549-/* The largest field or byte index used as an endpoint of a closed
3550+/* The largest byte, character or field index used as an endpoint of a closed
3551 or degenerate range specification; this doesn't include the starting
3552 index of right-open-ended ranges. For example, with either range spec
3553 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
3554@@ -97,10 +160,11 @@
3555
3556 /* This is a bit vector.
3557 In byte mode, which bytes to output.
3558+ In character mode, which characters to output.
3559 In field mode, which DELIM-separated fields to output.
3560- Both bytes and fields are numbered starting with 1,
3561+ Bytes, characters and fields are numbered starting with 1,
3562 so the zeroth bit of this array is unused.
3563- A field or byte K has been selected if
3564+ A byte, character or field K has been selected if
3565 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
3566 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
3567 static unsigned char *printable_field;
3568@@ -109,9 +173,12 @@
3569 {
3570 undefined_mode,
3571
3572- /* Output characters that are in the given bytes. */
3573+ /* Output bytes that are at the given positions. */
3574 byte_mode,
3575
3576+ /* Output characters that are at the given positions. */
3577+ character_mode,
3578+
3579 /* Output the given delimeter-separated fields. */
3580 field_mode
3581 };
3582@@ -121,6 +188,13 @@
3583
3584 static enum operating_mode operating_mode;
3585
3586+/* If nonzero, when in byte mode, don't split multibyte characters. */
3587+static int byte_mode_character_aware;
3588+
3589+/* If nonzero, the function for single byte locale is work
3590+ if this program runs on multibyte locale. */
3591+static int force_singlebyte_mode;
3592+
3593 /* If true do not output lines containing no delimeter characters.
3594 Otherwise, all such lines are printed. This option is valid only
3595 with field mode. */
3596@@ -132,6 +206,9 @@
3597
3598 /* The delimeter character for field mode. */
3599 static unsigned char delim;
3600+#if HAVE_WCHAR_H
3601+static wchar_t wcdelim;
3602+#endif
3603
3604 /* True if the --output-delimiter=STRING option was specified. */
3605 static bool output_delimiter_specified;
3606@@ -205,7 +282,7 @@
3607 -f, --fields=LIST select only these fields; also print any line\n\
3608 that contains no delimiter character, unless\n\
3609 the -s option is specified\n\
3610- -n (ignored)\n\
3611+ -n with -b: don't split multibyte characters\n\
3612 "), stdout);
3613 fputs (_("\
3614 --complement complement the set of selected bytes, characters\n\
3615@@ -362,7 +439,7 @@
3616 in_digits = false;
3617 /* Starting a range. */
3618 if (dash_found)
3619- FATAL_ERROR (_("invalid byte or field list"));
3620+ FATAL_ERROR (_("invalid byte, character or field list"));
3621 dash_found = true;
3622 fieldstr++;
3623
3624@@ -387,14 +464,16 @@
3625 if (value == 0)
3626 {
3627 /* `n-'. From `initial' to end of line. */
3628- eol_range_start = initial;
3629+ if (eol_range_start == 0 ||
3630+ (eol_range_start != 0 && eol_range_start > initial))
3631+ eol_range_start = initial;
3632 field_found = true;
3633 }
3634 else
3635 {
3636 /* `m-n' or `-n' (1-n). */
3637 if (value < initial)
3638- FATAL_ERROR (_("invalid byte or field list"));
3639+ FATAL_ERROR (_("invalid byte, character or field list"));
3640
3641 /* Is there already a range going to end of line? */
3642 if (eol_range_start != 0)
3643@@ -467,6 +546,9 @@
3644 if (operating_mode == byte_mode)
3645 error (0, 0,
3646 _("byte offset %s is too large"), quote (bad_num));
3647+ else if (operating_mode == character_mode)
3648+ error (0, 0,
3649+ _("character offset %s is too large"), quote (bad_num));
3650 else
3651 error (0, 0,
3652 _("field number %s is too large"), quote (bad_num));
3653@@ -477,7 +559,7 @@
3654 fieldstr++;
3655 }
3656 else
3657- FATAL_ERROR (_("invalid byte or field list"));
3658+ FATAL_ERROR (_("invalid byte, character or field list"));
3659 }
3660
3661 max_range_endpoint = 0;
3662@@ -570,6 +652,63 @@
3663 }
3664 }
3665
3666+#if HAVE_MBRTOWC
3667+/* This function is in use for the following case.
3668+
3669+ 1. Read from the stream STREAM, printing to standard output any selected
3670+ characters.
3671+
3672+ 2. Read from stream STREAM, printing to standard output any selected bytes,
3673+ without splitting multibyte characters. */
3674+
3675+static void
3676+cut_characters_or_cut_bytes_no_split (FILE *stream)
3677+{
3678+ int idx; /* number of bytes or characters in the line so far. */
3679+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3680+ char *bufpos; /* Next read position of BUF. */
3681+ size_t buflen; /* The length of the byte sequence in buf. */
3682+ wint_t wc; /* A gotten wide character. */
3683+ size_t mblength; /* The byte size of a multibyte character which shows
3684+ as same character as WC. */
3685+ mbstate_t state; /* State of the stream. */
3686+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
3687+
3688+ idx = 0;
3689+ buflen = 0;
3690+ bufpos = buf;
3691+ memset (&state, '\0', sizeof(mbstate_t));
3692+
3693+ while (1)
3694+ {
3695+ REFILL_BUFFER (buf, bufpos, buflen, stream);
3696+
3697+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
3698+
3699+ if (wc == WEOF)
3700+ {
3701+ if (idx > 0)
3702+ putchar ('\n');
3703+ break;
3704+ }
3705+ else if (wc == L'\n')
3706+ {
3707+ putchar ('\n');
3708+ idx = 0;
3709+ }
3710+ else
3711+ {
3712+ idx += (operating_mode == byte_mode) ? mblength : 1;
3713+ if (print_kth (idx, NULL))
3714+ fwrite (bufpos, mblength, sizeof(char), stdout);
3715+ }
3716+
3717+ buflen -= mblength;
3718+ bufpos += mblength;
3719+ }
3720+}
3721+#endif
3722+
3723 /* Read from stream STREAM, printing to standard output any selected fields. */
3724
3725 static void
3726@@ -692,13 +831,192 @@
3727 }
3728 }
3729
3730+#if HAVE_MBRTOWC
3731+static void
3732+cut_fields_mb (FILE *stream)
3733+{
3734+ int c;
3735+ unsigned int field_idx;
3736+ int found_any_selected_field;
3737+ int buffer_first_field;
3738+ int empty_input;
3739+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3740+ char *bufpos; /* Next read position of BUF. */
3741+ size_t buflen; /* The length of the byte sequence in buf. */
3742+ wint_t wc = 0; /* A gotten wide character. */
3743+ size_t mblength; /* The byte size of a multibyte character which shows
3744+ as same character as WC. */
3745+ mbstate_t state; /* State of the stream. */
3746+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
3747+
3748+ found_any_selected_field = 0;
3749+ field_idx = 1;
3750+ bufpos = buf;
3751+ buflen = 0;
3752+ memset (&state, '\0', sizeof(mbstate_t));
3753+
3754+ c = getc (stream);
3755+ empty_input = (c == EOF);
3756+ if (c != EOF)
3757+ ungetc (c, stream);
3758+ else
3759+ wc = WEOF;
3760+
3761+ /* To support the semantics of the -s flag, we may have to buffer
3762+ all of the first field to determine whether it is `delimited.'
3763+ But that is unnecessary if all non-delimited lines must be printed
3764+ and the first field has been selected, or if non-delimited lines
3765+ must be suppressed and the first field has *not* been selected.
3766+ That is because a non-delimited line has exactly one field. */
3767+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
3768+
3769+ while (1)
3770+ {
3771+ if (field_idx == 1 && buffer_first_field)
3772+ {
3773+ int len = 0;
3774+
3775+ while (1)
3776+ {
3777+ REFILL_BUFFER (buf, bufpos, buflen, stream);
3778+
3779+ GET_NEXT_WC_FROM_BUFFER
3780+ (wc, bufpos, buflen, mblength, state, convfail);
3781+
3782+ if (wc == WEOF)
3783+ break;
3784+
3785+ field_1_buffer = xrealloc (field_1_buffer, len + mblength);
3786+ memcpy (field_1_buffer + len, bufpos, mblength);
3787+ len += mblength;
3788+ buflen -= mblength;
3789+ bufpos += mblength;
3790+
3791+ if (!convfail && (wc == L'\n' || wc == wcdelim))
3792+ break;
3793+ }
3794+
3795+ if (wc == WEOF)
3796+ break;
3797+
3798+ /* If the first field extends to the end of line (it is not
3799+ delimited) and we are printing all non-delimited lines,
3800+ print this one. */
3801+ if (convfail || (!convfail && wc != wcdelim))
3802+ {
3803+ if (suppress_non_delimited)
3804+ {
3805+ /* Empty. */
3806+ }
3807+ else
3808+ {
3809+ fwrite (field_1_buffer, sizeof (char), len, stdout);
3810+ /* Make sure the output line is newline terminated. */
3811+ if (convfail || (!convfail && wc != L'\n'))
3812+ putchar ('\n');
3813+ }
3814+ continue;
3815+ }
3816+
3817+ if (print_kth (1, NULL))
3818+ {
3819+ /* Print the field, but not the trailing delimiter. */
3820+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
3821+ found_any_selected_field = 1;
3822+ }
3823+ ++field_idx;
3824+ }
3825+
3826+ if (wc != WEOF)
3827+ {
3828+ if (print_kth (field_idx, NULL))
3829+ {
3830+ if (found_any_selected_field)
3831+ {
3832+ fwrite (output_delimiter_string, sizeof (char),
3833+ output_delimiter_length, stdout);
3834+ }
3835+ found_any_selected_field = 1;
3836+ }
3837+
3838+ while (1)
3839+ {
3840+ REFILL_BUFFER (buf, bufpos, buflen, stream);
3841+
3842+ GET_NEXT_WC_FROM_BUFFER
3843+ (wc, bufpos, buflen, mblength, state, convfail);
3844+
3845+ if (wc == WEOF)
3846+ break;
3847+ else if (!convfail && (wc == wcdelim || wc == L'\n'))
3848+ {
3849+ buflen -= mblength;
3850+ bufpos += mblength;
3851+ break;
3852+ }
3853+
3854+ if (print_kth (field_idx, NULL))
3855+ fwrite (bufpos, mblength, sizeof(char), stdout);
3856+
3857+ buflen -= mblength;
3858+ bufpos += mblength;
3859+ }
3860+ }
3861+
3862+ if ((!convfail || wc == L'\n') && buflen < 1)
3863+ wc = WEOF;
3864+
3865+ if (!convfail && wc == wcdelim)
3866+ ++field_idx;
3867+ else if (wc == WEOF || (!convfail && wc == L'\n'))
3868+ {
3869+ if (found_any_selected_field
3870+ || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
3871+ putchar ('\n');
3872+ if (wc == WEOF)
3873+ break;
3874+ field_idx = 1;
3875+ found_any_selected_field = 0;
3876+ }
3877+ }
3878+}
3879+#endif
3880+
3881 static void
3882 cut_stream (FILE *stream)
3883 {
3884- if (operating_mode == byte_mode)
3885- cut_bytes (stream);
3886+#if HAVE_MBRTOWC
3887+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
3888+ {
3889+ switch (operating_mode)
3890+ {
3891+ case byte_mode:
3892+ if (byte_mode_character_aware)
3893+ cut_characters_or_cut_bytes_no_split (stream);
3894+ else
3895+ cut_bytes (stream);
3896+ break;
3897+
3898+ case character_mode:
3899+ cut_characters_or_cut_bytes_no_split (stream);
3900+ break;
3901+
3902+ case field_mode:
3903+ cut_fields_mb (stream);
3904+ break;
3905+
3906+ default:
3907+ abort ();
3908+ }
3909+ }
3910 else
3911- cut_fields (stream);
3912+#endif
3913+ {
3914+ if (operating_mode == field_mode)
3915+ cut_fields (stream);
3916+ else
3917+ cut_bytes (stream);
3918+ }
3919 }
3920
3921 /* Process file FILE to standard output.
3922@@ -748,6 +1066,8 @@
3923 bool ok;
3924 bool delim_specified = false;
3925 char *spec_list_string IF_LINT(= NULL);
3926+ char mbdelim[MB_LEN_MAX + 1];
3927+ size_t delimlen = 0;
3928
3929 initialize_main (&argc, &argv);
3930 program_name = argv[0];
3931@@ -770,7 +1090,6 @@
3932 switch (optc)
3933 {
3934 case 'b':
3935- case 'c':
3936 /* Build the byte list. */
3937 if (operating_mode != undefined_mode)
3938 FATAL_ERROR (_("only one type of list may be specified"));
3939@@ -778,6 +1097,14 @@
3940 spec_list_string = optarg;
3941 break;
3942
3943+ case 'c':
3944+ /* Build the character list. */
3945+ if (operating_mode != undefined_mode)
3946+ FATAL_ERROR (_("only one type of list may be specified"));
3947+ operating_mode = character_mode;
3948+ spec_list_string = optarg;
3949+ break;
3950+
3951 case 'f':
3952 /* Build the field list. */
3953 if (operating_mode != undefined_mode)
3954@@ -789,10 +1116,35 @@
3955 case 'd':
3956 /* New delimiter. */
3957 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
3958- if (optarg[0] != '\0' && optarg[1] != '\0')
3959- FATAL_ERROR (_("the delimiter must be a single character"));
3960- delim = optarg[0];
3961- delim_specified = true;
3962+#if HAVE_MBRTOWC
3963+ {
3964+ if(MB_CUR_MAX > 1)
3965+ {
3966+ mbstate_t state;
3967+
3968+ memset (&state, '\0', sizeof(mbstate_t));
3969+ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
3970+
3971+ if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
3972+ ++force_singlebyte_mode;
3973+ else
3974+ {
3975+ delimlen = (delimlen < 1) ? 1 : delimlen;
3976+ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
3977+ FATAL_ERROR (_("the delimiter must be a single character"));
3978+ memcpy (mbdelim, optarg, delimlen);
3979+ }
3980+ }
3981+
3982+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
3983+#endif
3984+ {
3985+ if (optarg[0] != '\0' && optarg[1] != '\0')
3986+ FATAL_ERROR (_("the delimiter must be a single character"));
3987+ delim = (unsigned char) optarg[0];
3988+ }
3989+ delim_specified = true;
3990+ }
3991 break;
3992
3993 case OUTPUT_DELIMITER_OPTION:
3994@@ -805,6 +1157,7 @@
3995 break;
3996
3997 case 'n':
3998+ byte_mode_character_aware = 1;
3999 break;
4000
4001 case 's':
4002@@ -827,7 +1180,7 @@
4003 if (operating_mode == undefined_mode)
4004 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
4005
4006- if (delim != '\0' && operating_mode != field_mode)
4007+ if (delim_specified && operating_mode != field_mode)
4008 FATAL_ERROR (_("an input delimiter may be specified only\
4009 when operating on fields"));
4010
4011@@ -854,15 +1207,34 @@
4012 }
4013
4014 if (!delim_specified)
4015- delim = '\t';
4016+ {
4017+ delim = '\t';
4018+#ifdef HAVE_MBRTOWC
4019+ wcdelim = L'\t';
4020+ mbdelim[0] = '\t';
4021+ mbdelim[1] = '\0';
4022+ delimlen = 1;
4023+#endif
4024+ }
4025
4026 if (output_delimiter_string == NULL)
4027 {
4028- static char dummy[2];
4029- dummy[0] = delim;
4030- dummy[1] = '\0';
4031- output_delimiter_string = dummy;
4032- output_delimiter_length = 1;
4033+#ifdef HAVE_MBRTOWC
4034+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
4035+ {
4036+ output_delimiter_string = xstrdup(mbdelim);
4037+ output_delimiter_length = delimlen;
4038+ }
4039+
4040+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
4041+#endif
4042+ {
4043+ static char dummy[2];
4044+ dummy[0] = delim;
4045+ dummy[1] = '\0';
4046+ output_delimiter_string = dummy;
4047+ output_delimiter_length = 1;
4048+ }
4049 }
4050
4051 if (optind == argc)