From 8c7bd787defa071c96289b7da9397f673fddb874 Mon Sep 17 00:00:00 2001
From: Ken Sharp <ken.sharp@artifex.com>
Date: Wed, 20 May 2020 16:02:07 +0100
Subject: [PATCH] txtwrite - address memory problems

Bug #702229 " txtwrite: use after free in 9.51 on some files (regression from 9.50)"
Also bug #702346 and the earlier report #701877.

The problems occur because its possible for a single character code in
a PDF file to map to more than a single Unicode code point. In the case
of the file for 701877 the character code maps to 'f' and 'i' (it is an
fi ligature).

The code should deal with this, but we need to ensure we are using the
correct index. In addition, if we do get more Unicode code points than
we expected, we need to set the widths of the 'extra' code points to
zero (we only want to consider the width of the original character).

This does mean increasing the size of the Widths array to cater for
the possibility of more entries on output than there were on input.

While working on it I noticed that the Unicode remapping on little-
endian machines was reversing the order of the Unicode values, when
there was more than a single code point returned, so fixed that at
the same time.

Upstream-Status: Backport [https://git.ghostscript.com/?p=ghostpdl.git;h=8c7bd787defa071c96289b7da9397f673fddb874]
CVE: CVE-2020-36773
Signed-off-by: Vijay Anusuri <vanusuri@mvista.com>
---
 devices/vector/gdevtxtw.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/devices/vector/gdevtxtw.c b/devices/vector/gdevtxtw.c
index 87f9355..bddce5a 100644
--- a/devices/vector/gdevtxtw.c
+++ b/devices/vector/gdevtxtw.c
@@ -1812,11 +1812,11 @@ static int get_unicode(textw_text_enum_t *penum, gs_font *font, gs_glyph glyph,
 #else
         b = (char *)Buffer;
         u = (char *)unicode;
-        while (l >= 0) {
-            *b++ = *(u + l);
-            l--;
-        }
 
+	for (l=0;l<length;l+=2, u+=2){
+	    *b++ = *(u+1);
+	    *b++ = *u;
+	}
 #endif
         gs_free_object(penum->dev->memory, unicode, "free temporary unicode buffer");
         return length / sizeof(short);
@@ -1963,7 +1963,7 @@ txtwrite_process_plain_text(gs_text_enum_t *pte)
                           &penum->text_state->matrix, &wanted);
         pte->returned.total_width.x += wanted.x;
         pte->returned.total_width.y += wanted.y;
-        penum->Widths[pte->index - 1] = wanted.x;
+        penum->Widths[penum->TextBufferIndex] = wanted.x;
 
         if (pte->text.operation & TEXT_ADD_TO_ALL_WIDTHS) {
             gs_point tpt;
@@ -1984,8 +1984,14 @@ txtwrite_process_plain_text(gs_text_enum_t *pte)
         pte->returned.total_width.x += dpt.x;
         pte->returned.total_width.y += dpt.y;
 
-        penum->TextBufferIndex += get_unicode(penum, (gs_font *)pte->orig_font, glyph, ch, &penum->TextBuffer[penum->TextBufferIndex]);
-        penum->Widths[pte->index - 1] += dpt.x;
+	penum->Widths[penum->TextBufferIndex] += dpt.x;
+	code = get_unicode(penum, (gs_font *)pte->orig_font, glyph, ch, &penum->TextBuffer[penum->TextBufferIndex]);
+	/* If a single text code returned multiple Unicode values, then we need to set the
+	 * 'extra' code points' widths to 0.
+	 */
+	if (code > 1)
+	    memset(&penum->Widths[penum->TextBufferIndex + 1], 0x00, (code - 1) * sizeof(float));
+	penum->TextBufferIndex += code;
     }
     return 0;
 }
@@ -2123,7 +2129,7 @@ txt_add_fragment(gx_device_txtwrite_t *tdev, textw_text_enum_t *penum)
     if (!penum->text_state->Widths)
         return gs_note_error(gs_error_VMerror);
     memset(penum->text_state->Widths, 0x00, penum->TextBufferIndex * sizeof(float));
-    memcpy(penum->text_state->Widths, penum->Widths, penum->text.size * sizeof(float));
+    memcpy(penum->text_state->Widths, penum->Widths, penum->TextBufferIndex * sizeof(float));
 
     unsorted_entry->Unicode_Text = (unsigned short *)gs_malloc(tdev->memory->stable_memory,
         penum->TextBufferIndex, sizeof(unsigned short), "txtwrite alloc sorted text buffer");
@@ -2136,7 +2142,7 @@ txt_add_fragment(gx_device_txtwrite_t *tdev, textw_text_enum_t *penum)
     if (!unsorted_entry->Widths)
         return gs_note_error(gs_error_VMerror);
     memset(unsorted_entry->Widths, 0x00, penum->TextBufferIndex * sizeof(float));
-    memcpy(unsorted_entry->Widths, penum->Widths, penum->text.size * sizeof(float));
+    memcpy(unsorted_entry->Widths, penum->Widths, penum->TextBufferIndex * sizeof(float));
 
     unsorted_entry->FontName = (char *)gs_malloc(tdev->memory->stable_memory,
         (strlen(penum->text_state->FontName) + 1), sizeof(unsigned char), "txtwrite alloc sorted text buffer");
@@ -2192,7 +2198,7 @@ textw_text_process(gs_text_enum_t *pte)
         if (!penum->TextBuffer)
             return gs_note_error(gs_error_VMerror);
         penum->Widths = (float *)gs_malloc(tdev->memory->stable_memory,
-            pte->text.size, sizeof(float), "txtwrite temporary widths array");
+            pte->text.size * 4, sizeof(float), "txtwrite temporary widths array");
         if (!penum->Widths)
             return gs_note_error(gs_error_VMerror);
     }
-- 
2.25.1