Submitted By: Ken Moffat Date: 2019-05-31 Initial Package Version: 2019 Upstream Status: Applied Origin: Upstream Description: Fixes from branch2019 up to svn r51236, and fixes from trunk for harfbuzz-2.5.0 changes (svn r51269, 51271). diff -Naur a/texk/dvipdfm-x/ChangeLog b/texk/dvipdfm-x/ChangeLog --- a/texk/dvipdfm-x/ChangeLog 2019-04-07 02:42:55.000000000 +0100 +++ b/texk/dvipdfm-x/ChangeLog 2019-05-31 22:00:04.006964029 +0100 @@ -1,3 +1,47 @@ +2019-05-08 Akira Kakuto + + * dpxfile.c: Avoid buffer overflow. + +2019-05-08 Shunsaku Hirata + + * spc_pdfm.c: Transformation also applies to annotations + created by pdf:ann. + +2019-05-07 Shunsaku Hirata + + * tt_cmap.c: Fix a bug that CFF charset data were not read. + * unicode.c: Fix a bug that end-of-buffer calculation was + wrong. + +2019-05-05 Shunsaku Hirata + + * pdfdoc.c: g option affects only annotations created by + "pdf:bann/eann" and html "a link" specials. + Report from Joseph Wright: + https://tug.org/pipermail/tex-live/2019-May/043612.html + +2019-05-03 Shunsaku Hirata + + * specials.c: transformation applied to current point was not + done properly. + +2019-05-03 Akira Kakuto + + * dpxfile.c, mfileio.h, pdfximage.c: Support non-ascii file + names in default code page of OS irrespective of values of a + variable 'command_line_encoding' (Windows only). + +2019-05-03 Shunsaku Hirata + + * pdfobj.c, pdfobj.h: Add support for ASCIIHex and ASCII85 + decode filter. Support for decoding stream data with multiple + filters applied. Remove unused function. + * tt_cmap.c, tt_gsub.[ch]: Revise ToUnicode CMap creation for + OpenType. Use GSUB for mapping unencoded glyphs to Unicode + (XeTeX support), lowering priority of CJK compatibility + ideographs. + * configure.ac: Version 20190503. + 2019-04-07 Karl Berry * TeX Live 2019. diff -Naur a/texk/dvipdfm-x/configure b/texk/dvipdfm-x/configure --- a/texk/dvipdfm-x/configure 2019-02-24 22:07:11.000000000 +0000 +++ b/texk/dvipdfm-x/configure 2019-05-31 22:00:04.007964030 +0100 @@ -1,8 +1,8 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for dvipdfm-x (TeX Live) 20190225. +# Generated by GNU Autoconf 2.69 for dvipdfm-x (TeX Live) 20190503. # -# Report bugs to . +# Report bugs to . # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -274,7 +274,7 @@ $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" $as_echo "$0: be upgraded to zsh 4.3.4 or later." else - $as_echo "$0: Please tell bug-autoconf@gnu.org and tex-k@tug.org + $as_echo "$0: Please tell bug-autoconf@gnu.org and dvipdfmx@tug.org $0: about your system, including any error possibly output $0: before this message. Then install a modern shell, or $0: manually run the script under such a shell if you do @@ -590,9 +590,9 @@ # Identity of this package. PACKAGE_NAME='dvipdfm-x (TeX Live)' PACKAGE_TARNAME='dvipdfm-x--tex-live-' -PACKAGE_VERSION='20190225' -PACKAGE_STRING='dvipdfm-x (TeX Live) 20190225' -PACKAGE_BUGREPORT='tex-k@tug.org' +PACKAGE_VERSION='20190503' +PACKAGE_STRING='dvipdfm-x (TeX Live) 20190503' +PACKAGE_BUGREPORT='dvipdfmx@tug.org' PACKAGE_URL='' ac_unique_file="agl.c" @@ -1350,7 +1350,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures dvipdfm-x (TeX Live) 20190225 to adapt to many kinds of systems. +\`configure' configures dvipdfm-x (TeX Live) 20190503 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1421,7 +1421,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of dvipdfm-x (TeX Live) 20190225:";; + short | recursive ) echo "Configuration of dvipdfm-x (TeX Live) 20190503:";; esac cat <<\_ACEOF @@ -1488,7 +1488,7 @@ Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. -Report bugs to . +Report bugs to . _ACEOF ac_status=$? fi @@ -1551,7 +1551,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -dvipdfm-x (TeX Live) configure 20190225 +dvipdfm-x (TeX Live) configure 20190503 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1896,9 +1896,9 @@ $as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} -( $as_echo "## ---------------------------- ## -## Report this to tex-k@tug.org ## -## ---------------------------- ##" +( $as_echo "## ------------------------------- ## +## Report this to dvipdfmx@tug.org ## +## ------------------------------- ##" ) | sed "s/^/$as_me: WARNING: /" >&2 ;; esac @@ -2390,7 +2390,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by dvipdfm-x (TeX Live) $as_me 20190225, which was +It was created by dvipdfm-x (TeX Live) $as_me 20190503, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -8077,7 +8077,7 @@ # Define the identity of the package. PACKAGE='dvipdfm-x--tex-live-' - VERSION='20190225' + VERSION='20190503' cat >>confdefs.h <<_ACEOF @@ -14746,7 +14746,7 @@ Report bugs to ." lt_cl_version="\ -dvipdfm-x (TeX Live) config.lt 20190225 +dvipdfm-x (TeX Live) config.lt 20190503 configured by $0, generated by GNU Autoconf 2.69. Copyright (C) 2011 Free Software Foundation, Inc. @@ -16636,7 +16636,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by dvipdfm-x (TeX Live) $as_me 20190225, which was +This file was extended by dvipdfm-x (TeX Live) $as_me 20190503, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -16700,13 +16700,13 @@ Configuration commands: $config_commands -Report bugs to ." +Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -dvipdfm-x (TeX Live) config.status 20190225 +dvipdfm-x (TeX Live) config.status 20190503 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff -Naur a/texk/dvipdfm-x/configure.ac b/texk/dvipdfm-x/configure.ac --- a/texk/dvipdfm-x/configure.ac 2019-02-24 22:07:11.000000000 +0000 +++ b/texk/dvipdfm-x/configure.ac 2019-05-31 22:00:04.007964030 +0100 @@ -7,7 +7,7 @@ dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. dnl -AC_INIT([dvipdfm-x (TeX Live)], [20190225], [tex-k@tug.org]) +AC_INIT([dvipdfm-x (TeX Live)], [20190503], [dvipdfmx@tug.org]) AC_PREREQ([2.65]) AC_CONFIG_SRCDIR([agl.c]) AC_CONFIG_AUX_DIR([../../build-aux]) diff -Naur a/texk/dvipdfm-x/dpxfile.c b/texk/dvipdfm-x/dpxfile.c --- a/texk/dvipdfm-x/dpxfile.c 2018-10-07 22:27:40.000000000 +0100 +++ b/texk/dvipdfm-x/dpxfile.c 2019-05-31 22:00:04.007964030 +0100 @@ -1,5 +1,5 @@ /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks. - Copyright (C) 2007-2018 by Jin-Hwan Cho and Shunsaku Hirata, + Copyright (C) 2007-2019 by Jin-Hwan Cho and Shunsaku Hirata, the dvipdfmx project team. Copyright (C) 1998, 1999 by Mark A. Wicks @@ -159,11 +159,17 @@ static char _tmpbuf[PATH_MAX+1]; #endif /* MIKTEX */ +#if defined(WIN32) +extern int utf8name_failed; +#endif /* WIN32 */ + +#define CMDBUFSIZ 1024 static int exec_spawn (char *cmd) { char **cmdv, **qv; char *p, *pp; - char buf[1024]; + char buf[CMDBUFSIZ]; + int charcnt; int i, ret = -1; #ifdef WIN32 wchar_t **cmdvw, **qvw; @@ -182,11 +188,12 @@ i++; p++; } - cmdv = xcalloc (i + 2, sizeof (char *)); + cmdv = xcalloc (i + 4, sizeof (char *)); p = cmd; qv = cmdv; while (*p) { pp = buf; + charcnt = 0; if (*p == '"') { p++; while (*p != '"') { @@ -194,6 +201,10 @@ goto done; } *pp++ = *p++; + charcnt++; + if (charcnt > CMDBUFSIZ - 1) { + ERROR("Too long a command line."); + } } p++; } else if (*p == '\'') { @@ -203,6 +214,10 @@ goto done; } *pp++ = *p++; + charcnt++; + if (charcnt > CMDBUFSIZ - 1) { + ERROR("Too long a command line."); + } } p++; } else { @@ -214,10 +229,18 @@ goto done; } *pp++ = *p++; + charcnt++; + if (charcnt > CMDBUFSIZ - 1) { + ERROR("Too long a command line."); + } } p++; } else { *pp++ = *p++; + charcnt++; + if (charcnt > CMDBUFSIZ - 1) { + ERROR("Too long a command line."); + } } } } @@ -235,20 +258,39 @@ p++; qv++; } + *qv = NULL; + #ifdef WIN32 #if defined(MIKTEX) ret = _spawnvp(_P_WAIT, *cmdv, (const char* const*)cmdv); #else - cmdvw = xcalloc (i + 2, sizeof (wchar_t *)); - qv = cmdv; - qvw = cmdvw; - while (*qv) { - *qvw = get_wstring_from_fsyscp(*qv, *qvw=NULL); - qv++; - qvw++; + cmdvw = xcalloc (i + 4, sizeof (wchar_t *)); + if (utf8name_failed == 0) { + qv = cmdv; + qvw = cmdvw; + while (*qv) { + *qvw = get_wstring_from_fsyscp(*qv, *qvw=NULL); + qv++; + qvw++; + } + *qvw = NULL; + ret = _wspawnvp (_P_WAIT, *cmdvw, (const wchar_t* const*) cmdvw); + } else { + int tmpcp; + tmpcp = file_system_codepage; + file_system_codepage = win32_codepage; + qv = cmdv; + qvw = cmdvw; + while (*qv) { + *qvw = get_wstring_from_fsyscp(*qv, *qvw=NULL); + qv++; + qvw++; + } + *qvw = NULL; + file_system_codepage = tmpcp; + utf8name_failed = 0; + ret = _wspawnvp (_P_WAIT, *cmdvw, (const wchar_t* const*) cmdvw); } - *qvw = NULL; - ret = _wspawnvp (_P_WAIT, *cmdvw, (const wchar_t* const*) cmdvw); if (cmdvw) { qvw = cmdvw; while (*qvw) { @@ -1242,3 +1284,21 @@ return r; } + +#if defined(WIN32) +FILE *generic_fsyscp_fopen (const char *filename, const char *mode) +{ + FILE *f; + + f = fsyscp_fopen (filename, mode); + + if (f == NULL && file_system_codepage != win32_codepage) { + int tmpcp = file_system_codepage; + file_system_codepage = win32_codepage; + f = fsyscp_fopen (filename, mode); + file_system_codepage = tmpcp; + } + + return f; +} +#endif /* WIN32 */ diff -Naur a/texk/dvipdfm-x/dvipdfmx.c b/texk/dvipdfm-x/dvipdfmx.c --- a/texk/dvipdfm-x/dvipdfmx.c 2019-02-11 18:22:31.000000000 +0000 +++ b/texk/dvipdfm-x/dvipdfmx.c 2019-05-31 22:00:04.007964030 +0100 @@ -232,7 +232,7 @@ printf ("Papersize is specified by paper format (e.g., \"a4\")\n"); printf ("\tor by w,h (e.g., \"20cm,30cm\").\n"); printf ("\n"); - printf ("Email bug reports to tex-k@tug.org.\n"); + printf ("Email bug reports to dvipdfmx@tug.org.\n"); } static void diff -Naur a/texk/dvipdfm-x/mfileio.h b/texk/dvipdfm-x/mfileio.h --- a/texk/dvipdfm-x/mfileio.h 2016-01-11 03:57:04.000000000 +0000 +++ b/texk/dvipdfm-x/mfileio.h 2019-05-31 22:00:04.008964031 +0100 @@ -1,6 +1,6 @@ /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks. - Copyright (C) 2002-2016 by Jin-Hwan Cho and Shunsaku Hirata, + Copyright (C) 2002-2019 by Jin-Hwan Cho and Shunsaku Hirata, the dvipdfmx project team. Copyright (C) 1998, 1999 by Mark A. Wicks @@ -36,7 +36,8 @@ mfclose((file),__FUNCTION__,__LINE__) #else #if defined(WIN32) -#define MFOPEN(name,mode) fsyscp_fopen((name),(mode)) +extern FILE *generic_fsyscp_fopen(const char *fname, const char *mode); +#define MFOPEN(name,mode) generic_fsyscp_fopen((name),(mode)) #else #define MFOPEN(name,mode) fopen((name),(mode)) #endif diff -Naur a/texk/dvipdfm-x/pdfdoc.c b/texk/dvipdfm-x/pdfdoc.c --- a/texk/dvipdfm-x/pdfdoc.c 2018-12-21 03:39:51.000000000 +0000 +++ b/texk/dvipdfm-x/pdfdoc.c 2019-05-31 22:00:04.008964031 +0100 @@ -1,6 +1,6 @@ /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks. - Copyright (C) 2008-2018 by Jin-Hwan Cho, Matthias Franz, and Shunsaku Hirata, + Copyright (C) 2008-2019 by Jin-Hwan Cho, Matthias Franz, and Shunsaku Hirata, the dvipdfmx project team. Copyright (C) 1998, 1999 by Mark A. Wicks @@ -1780,7 +1780,6 @@ pdf_doc *p = &pdoc; pdf_page *page; pdf_obj *rect_array; - double annot_grow = p->opt.annot_grow; double xpos, ypos; pdf_rect annbox; @@ -1812,10 +1811,10 @@ } rect_array = pdf_new_array(); - pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.llx - annot_grow, 0.001))); - pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.lly - annot_grow, 0.001))); - pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.urx + annot_grow, 0.001))); - pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.ury + annot_grow, 0.001))); + pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.llx, 0.001))); + pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.lly, 0.001))); + pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.urx, 0.001))); + pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.ury, 0.001))); pdf_add_dict (annot_dict, pdf_new_name("Rect"), rect_array); pdf_add_array(page->annots, pdf_ref_obj(annot_dict)); @@ -2802,13 +2801,22 @@ void pdf_doc_break_annot (void) { + pdf_doc *p = &pdoc; + double g = p->opt.annot_grow; + if (breaking_state.dirty) { pdf_obj *annot_dict; + pdf_rect rect; /* Copy dict */ annot_dict = pdf_new_dict(); pdf_merge_dict(annot_dict, breaking_state.annot_dict); - pdf_doc_add_annot(pdf_doc_current_page_number(), &(breaking_state.rect), + rect = breaking_state.rect; + rect.llx -= g; + rect.lly -= g; + rect.urx += g; + rect.ury += g; + pdf_doc_add_annot(pdf_doc_current_page_number(), &rect, annot_dict, !breaking_state.broken); pdf_release_obj(annot_dict); diff -Naur a/texk/dvipdfm-x/pdfobj.c b/texk/dvipdfm-x/pdfobj.c --- a/texk/dvipdfm-x/pdfobj.c 2018-12-26 23:07:57.000000000 +0000 +++ b/texk/dvipdfm-x/pdfobj.c 2019-05-31 22:00:04.008964031 +0100 @@ -1,6 +1,6 @@ /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks. - Copyright (C) 2007-2018 by Jin-Hwan Cho and Shunsaku Hirata, + Copyright (C) 2007-2019 by Jin-Hwan Cho and Shunsaku Hirata, the dvipdfmx project team. Copyright (C) 1998, 1999 by Mark A. Wicks @@ -2223,48 +2223,9 @@ #if HAVE_ZLIB #define WBUF_SIZE 4096 -int -pdf_add_stream_flate (pdf_obj *dst, const void *data, int len) -{ - z_stream z; - Bytef wbuf[WBUF_SIZE]; - - z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL; - - z.next_in = (z_const Bytef *) data; z.avail_in = len; - z.next_out = (Bytef *) wbuf; z.avail_out = WBUF_SIZE; - - if (inflateInit(&z) != Z_OK) { - WARN("inflateInit() failed."); - return -1; - } - - for (;;) { - int status; - status = inflate(&z, Z_NO_FLUSH); - if (status == Z_STREAM_END) - break; - else if (status != Z_OK) { - WARN("inflate() failed. Broken PDF file?"); - inflateEnd(&z); - return -1; - } - - if (z.avail_out == 0) { - pdf_add_stream(dst, wbuf, WBUF_SIZE); - z.next_out = wbuf; - z.avail_out = WBUF_SIZE; - } - } - - if (WBUF_SIZE - z.avail_out > 0) - pdf_add_stream(dst, wbuf, WBUF_SIZE - z.avail_out); - - return (inflateEnd(&z) == Z_OK ? 0 : -1); -} static int -get_decode_parms (struct decode_parms *parms, pdf_obj *dict) +filter_get_DecodeParms_FlateDecode (struct decode_parms *parms, pdf_obj *dict) { pdf_obj *tmp; @@ -2278,18 +2239,25 @@ parms->columns = 1; tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Predictor")); - if (tmp) + if (tmp) { parms->predictor = pdf_number_value(tmp); + pdf_release_obj(tmp); + } tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Colors")); - if (tmp) + if (tmp) { parms->colors = pdf_number_value(tmp); + pdf_release_obj(tmp); + } tmp = pdf_deref_obj(pdf_lookup_dict(dict, "BitsPerComponent")); - if (tmp) + if (tmp) { parms->bits_per_component = pdf_number_value(tmp); + pdf_release_obj(tmp); + } tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Columns")); - if (tmp) + if (tmp) { parms->columns = pdf_number_value(tmp); - + pdf_release_obj(tmp); + } if (parms->bits_per_component != 1 && parms->bits_per_component != 2 && parms->bits_per_component != 4 && @@ -2354,17 +2322,19 @@ * Especially, calling pdf_add_stream() for each 4 bytes append is highly * inefficient. */ -static int -filter_decoded (pdf_obj *dst, const void *src, int srclen, - struct decode_parms *parms) +static pdf_obj * +filter_stream_decode_Predictor (const void *src, size_t srclen, struct decode_parms *parms) { + pdf_obj *dst; const unsigned char *p = (const unsigned char *) src; const unsigned char *endptr = p + srclen; - unsigned char *prev, *buf; - int bits_per_pixel = parms->colors * parms->bits_per_component; - int bytes_per_pixel = (bits_per_pixel + 7) / 8; - int length = (parms->columns * bits_per_pixel + 7) / 8; - int i, error = 0; + unsigned char *prev, *buf; + int bits_per_pixel = parms->colors * parms->bits_per_component; + int bytes_per_pixel = (bits_per_pixel + 7) / 8; + int length = (parms->columns * bits_per_pixel + 7) / 8; + int i, error = 0; + + dst = pdf_new_stream(0); prev = NEW(length, unsigned char); buf = NEW(length, unsigned char); @@ -2491,16 +2461,21 @@ RELEASE(prev); RELEASE(buf); - return error; + if (error) { + pdf_release_obj(dst); + dst = NULL; + } + + return dst; } -static int -pdf_add_stream_flate_filtered (pdf_obj *dst, const void *data, int len, struct decode_parms *parms) +static pdf_obj * +filter_stream_decode_FlateDecode (const void *data, size_t len, struct decode_parms *parms) { - pdf_obj *tmp; - z_stream z; - Bytef wbuf[WBUF_SIZE]; - int error; + pdf_obj *dst; + pdf_obj *tmp; + z_stream z; + Bytef wbuf[WBUF_SIZE]; z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL; @@ -2509,7 +2484,7 @@ if (inflateInit(&z) != Z_OK) { WARN("inflateInit() failed."); - return -1; + return NULL; } tmp = pdf_new_stream(0); @@ -2521,7 +2496,8 @@ else if (status != Z_OK) { WARN("inflate() failed. Broken PDF file?"); inflateEnd(&z); - return -1; + pdf_release_obj(tmp); + return NULL; } if (z.avail_out == 0) { @@ -2534,79 +2510,310 @@ if (WBUF_SIZE - z.avail_out > 0) pdf_add_stream(tmp, wbuf, WBUF_SIZE - z.avail_out); - error = filter_decoded(dst, pdf_stream_dataptr(tmp), pdf_stream_length(tmp), parms); + if (inflateEnd(&z) == Z_OK) { + if (parms) { + dst = filter_stream_decode_Predictor(pdf_stream_dataptr(tmp), pdf_stream_length(tmp), parms); + } else { + dst = pdf_link_obj(tmp); + } + } else { + dst = NULL; + } pdf_release_obj(tmp); - return ((!error && inflateEnd(&z) == Z_OK) ? 0 : -1); + return dst; } #endif -int -pdf_concat_stream (pdf_obj *dst, pdf_obj *src) +static pdf_obj * +filter_stream_decode_ASCIIHexDecode (const void *data, size_t len) +{ + pdf_obj *dst; + int eod, error; + const char *p = (const char *) data; + const char *endptr = p + len; + unsigned char *buf, ch; + size_t pos, n; + + buf = NEW((len+1)/2, unsigned char); + skip_white(&p, endptr); + ch = 0; n = 0; pos = 0; eod = 0; error = 0; + while (p < endptr && !error && !eod) { + char c1, val; + c1 = p[0]; + if (c1 >= 'A' && c1 <= 'F') { + val = c1 - 'A' + 10; + } else if (c1 >= 'a' && c1 <= 'f') { + val = c1 - 'a' + 10; + } else if (c1 >= '0' && c1 <= '9') { + val = c1 - '0'; + } else if (c1 == '>') { + val = 0; + eod = 1; + if ((pos % 2) == 0) + break; + } else { + error = -1; + break; + } + if (pos % 2) { + buf[n] = ch + val; + n++; + ch = 0; + } else { + ch = val << 4; + } + pos++; p++; + skip_white(&p, endptr); + } + if (error || !eod) { + WARN("Invalid ASCIIHex data seen: %s", error ? "Invalid character" : "No EOD marker"); + dst = NULL; + } else { + dst = pdf_new_stream(0); + pdf_add_stream(dst, buf, n); + } + RELEASE(buf); + + return dst; +} + +/* Percent sign is not start of comment here. + * We need this for reading Ascii85 encoded data. + */ +#define is_space(c) ((c) == ' ' || (c) == '\t' || (c) == '\f' || \ + (c) == '\r' || (c) == '\n' || (c) == '\0') +static void +skip_white_a85 (const char **p, const char *endptr) +{ + while (*p < endptr && (is_space(**p))) { + (*p)++; + } +} + +static pdf_obj * +filter_stream_decode_ASCII85Decode (const void *data, size_t len) { + pdf_obj *dst; + int eod, error; + const char *p = (const char *) data; + const char *endptr = p + len; + unsigned char *buf; + size_t n; + + buf = NEW(((len+4)/5)*4, unsigned char); + skip_white_a85(&p, endptr); + n = 0; eod = 0; error = 0; + while (p < endptr && !error && !eod) { + char q[5] = {'u', 'u', 'u', 'u', 'u'}; + int m; + char ch; + + ch = p[0]; + p++; + skip_white_a85(&p, endptr); + if (ch == 'z') { + memset(buf+n, 0, 4); + n += 4; + continue; + } else if (ch == '~') { + if (p < endptr && p[0] == '>') { + eod = 1; + p++; + } else { + error = -1; + } + break; + } + q[0] = ch; + for (m = 1; m < 5 && p < endptr; m++) { + ch = p[0]; + p++; + skip_white_a85(&p, endptr); + if (ch == '~') { + if (p < endptr && p[0] == '>') { + eod = 1; + p++; + } else { + error = -1; + } + break; + } else if (ch < '!' || ch > 'u') { + error = -1; + break; + } else { + q[m] = ch; + } + } + if (!error) { + uint32_t val = 0; + int i; + if (m <= 1) { + error = -1; + break; + } + val = 85*85*85*(q[0] - '!') + 85*85*(q[1] - '!') + + 85*(q[2] - '!') + (q[3] - '!'); + /* Check overflow */ + if (val > UINT32_MAX / 85) { + error = -1; + break; + } else { + val = 85 * val; + if (val > UINT32_MAX - (q[4] - '!')) { + error = -1; + break; + } + val += (q[4] - '!'); + } + if (!error) { + for (i = 3; i >= 0; i--) { + buf[n + i] = val & 0xff; + val /= 256; + } + n += m - 1; + } + } + } + + if (error) { + WARN("Error in reading ASCII85 data."); + dst = NULL; + } else if (!eod) { + WARN("Error in reading ASCII85 data: No EOD"); + dst = NULL; + } else { + dst = pdf_new_stream(0); + pdf_add_stream(dst, buf, n); + } + RELEASE(buf); + + return dst; +} + +static pdf_obj * +filter_stream_decode (const char *filter_name, pdf_obj *src, pdf_obj *parm) +{ + pdf_obj *dec; const char *stream_data; - int stream_length; - pdf_obj *stream_dict; - pdf_obj *filter; - int error = 0; + size_t stream_length; - if (!PDF_OBJ_STREAMTYPE(dst) || !PDF_OBJ_STREAMTYPE(src)) - ERROR("Invalid type."); + if (!filter_name) + return pdf_link_obj(src); stream_data = pdf_stream_dataptr(src); - stream_length = pdf_stream_length (src); - stream_dict = pdf_stream_dict (src); + stream_length = pdf_stream_length(src); - filter = pdf_lookup_dict(stream_dict, "Filter"); - if (!filter) - pdf_add_stream(dst, stream_data, stream_length); + if (!strcmp(filter_name, "ASCIIHexDecode")) { + dec = filter_stream_decode_ASCIIHexDecode(stream_data, stream_length); + } else if (!strcmp(filter_name, "ASCII85Decode")) { + dec = filter_stream_decode_ASCII85Decode(stream_data, stream_length); #if HAVE_ZLIB - else { - struct decode_parms parms; - int have_parms = 0; + } else if (!strcmp(filter_name, "FlateDecode")) { + struct decode_parms decode_parm; + if (parm) + filter_get_DecodeParms_FlateDecode(&decode_parm, parm); + dec = filter_stream_decode_FlateDecode(stream_data, stream_length, parm ? &decode_parm : NULL); +#endif /* HAVE_ZLIB */ + } else { + WARN("DecodeFilter \"%s\" not supported.", filter_name); + dec = NULL; + } - if (pdf_lookup_dict(stream_dict, "DecodeParms")) { - pdf_obj *tmp; + return dec; +} - /* Dictionary or array */ - tmp = pdf_deref_obj(pdf_lookup_dict(stream_dict, "DecodeParms")); - if (PDF_OBJ_ARRAYTYPE(tmp)) { - if (pdf_array_length(tmp) > 1) { - WARN("Unexpected size for DecodeParms array."); - return -1; - } - tmp = pdf_deref_obj(pdf_get_array(tmp, 0)); - } - if (!PDF_OBJ_DICTTYPE(tmp)) { - WARN("PDF dict expected for DecodeParms..."); - return -1; - } - error = get_decode_parms(&parms, tmp); - if (error) - ERROR("Invalid value(s) in DecodeParms dictionary."); - have_parms = 1; - } - if (PDF_OBJ_ARRAYTYPE(filter)) { - if (pdf_array_length(filter) > 1) { - WARN("Multiple DecodeFilter not supported."); +int +pdf_concat_stream (pdf_obj *dst, pdf_obj *src) +{ + pdf_obj *filtered; + pdf_obj *stream_dict; + pdf_obj *filter, *parms; + int error = 0; + + if (!PDF_OBJ_STREAMTYPE(dst) || !PDF_OBJ_STREAMTYPE(src)) { + WARN("Passed invalid type in pdf_concat_stream()."); + return -1; + } + + stream_dict = pdf_stream_dict(src); + + filter = pdf_lookup_dict(stream_dict, "Filter"); + if (!filter) { + pdf_add_stream(dst, pdf_stream_dataptr(src), pdf_stream_length(src)); + return 0; + } + if (pdf_lookup_dict(stream_dict, "DecodeParms")) { + /* Dictionary or array */ + parms = pdf_deref_obj(pdf_lookup_dict(stream_dict, "DecodeParms")); + if (!parms) { + WARN("Failed to deref DeocdeParms..."); + return -1; + } else if (!PDF_OBJ_ARRAYTYPE(parms) && !PDF_OBJ_DICTTYPE(parms)) { + WARN("PDF dict or array expected for DecodeParms..."); + pdf_release_obj(parms); + return -1; + } + } else { + parms = NULL; + } + if (PDF_OBJ_ARRAYTYPE(filter)) { + int i, num; + pdf_obj *prev = NULL; + + num = pdf_array_length(filter); + if (parms) { + if (!PDF_OBJ_ARRAYTYPE(parms) || pdf_array_length(parms) != num) { + WARN("Invalid DecodeParam object found."); + pdf_release_obj(parms); return -1; } - filter = pdf_get_array(filter, 0); } - if (PDF_OBJ_NAMETYPE(filter)) { - char *filter_name = pdf_name_value(filter); - if (filter_name && !strcmp(filter_name, "FlateDecode")) { - if (have_parms) - error = pdf_add_stream_flate_filtered(dst, stream_data, stream_length, &parms); - else - error = pdf_add_stream_flate(dst, stream_data, stream_length); - } else { - WARN("DecodeFilter \"%s\" not supported.", filter_name); - error = -1; + if (num == 0) { + filtered = pdf_link_obj(src); + } else { + filtered = NULL; + prev = pdf_link_obj(src); + for (i = 0; i < num && prev != NULL; i++) { + pdf_obj *tmp1, *tmp2; + + tmp1 = pdf_deref_obj(pdf_get_array(filter, i)); + if (parms) { + tmp2 = pdf_deref_obj(pdf_get_array(parms, i)); + } else { + tmp2 = NULL; + } + if (PDF_OBJ_NAMETYPE(tmp1)) { + filtered = filter_stream_decode(pdf_name_value(tmp1), prev, tmp2); + } else if (PDF_OBJ_NULLTYPE(tmp1)) { + filtered = pdf_link_obj(prev); + } else { + WARN("Unexpected object found for /Filter..."); + filtered = NULL; + } + if (prev) + pdf_release_obj(prev); + if (tmp1) + pdf_release_obj(tmp1); + if (tmp2) + pdf_release_obj(tmp2); + prev = filtered; } - } else - ERROR("Broken PDF file?"); -#endif /* HAVE_ZLIB */ + } + } else if (PDF_OBJ_NAMETYPE(filter)) { + filtered = filter_stream_decode(pdf_name_value(filter), src, parms); + } else { + WARN("Invalid value for /Filter found."); + filtered = NULL; + } + if (parms) + pdf_release_obj(parms); + if (filtered) { + pdf_add_stream(dst, pdf_stream_dataptr(filtered), pdf_stream_length(filtered)); + pdf_release_obj(filtered); + error = 0; + } else { + error = -1; } return error; diff -Naur a/texk/dvipdfm-x/pdfobj.h b/texk/dvipdfm-x/pdfobj.h --- a/texk/dvipdfm-x/pdfobj.h 2018-09-15 22:18:43.000000000 +0100 +++ b/texk/dvipdfm-x/pdfobj.h 2019-05-31 22:00:04.008964031 +0100 @@ -1,6 +1,6 @@ /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks. - Copyright (C) 2007-2018 by Jin-Hwan Cho and Shunsaku Hirata, + Copyright (C) 2007-2019 by Jin-Hwan Cho and Shunsaku Hirata, the dvipdfmx project team. Copyright (C) 1998, 1999 by Mark A. Wicks @@ -152,11 +152,6 @@ extern void pdf_add_stream (pdf_obj *stream, const void *stream_data_ptr, int stream_data_len); -#if HAVE_ZLIB -extern int pdf_add_stream_flate (pdf_obj *stream, - const void *stream_data_ptr, - int stream_data_len); -#endif extern int pdf_concat_stream (pdf_obj *dst, pdf_obj *src); extern pdf_obj *pdf_stream_dict (pdf_obj *stream); extern int pdf_stream_length (pdf_obj *stream); diff -Naur a/texk/dvipdfm-x/pdfximage.c b/texk/dvipdfm-x/pdfximage.c --- a/texk/dvipdfm-x/pdfximage.c 2018-09-14 04:34:50.000000000 +0100 +++ b/texk/dvipdfm-x/pdfximage.c 2019-05-31 22:00:04.008964031 +0100 @@ -1,6 +1,6 @@ /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks. - Copyright (C) 2007-2018 by Jin-Hwan Cho and Shunsaku Hirata, + Copyright (C) 2007-2019 by Jin-Hwan Cho and Shunsaku Hirata, the dvipdfmx project team. Copyright (C) 1998, 1999 by Mark A. Wicks @@ -363,6 +363,10 @@ #define dpx_fopen(n,m) (MFOPEN((n),(m))) #define dpx_fclose(f) (MFCLOSE((f))) +#if defined(WIN32) +int utf8name_failed = 0; +#endif /* WIN32 */ + int pdf_ximage_findresource (const char *ident, load_options options) { @@ -394,8 +398,23 @@ strcpy(fullname, f); } else { /* try loading image */ +#if defined(WIN32) + utf8name_failed = 0; +#endif /* WIN32 */ fullname = dpx_find_file(ident, "_pic_", ""); +#if defined(WIN32) + if (!fullname && file_system_codepage != win32_codepage) { + int tmpcp = file_system_codepage; + utf8name_failed = 1; + file_system_codepage = win32_codepage; + fullname = dpx_find_file(ident, "_pic_", ""); + file_system_codepage = tmpcp; + } +#endif /* WIN32 */ if (!fullname) { +#if defined(WIN32) + utf8name_failed = 0; +#endif /* WIN32 */ WARN("Error locating image file \"%s\"", ident); return -1; } diff -Naur a/texk/dvipdfm-x/README b/texk/dvipdfm-x/README --- a/texk/dvipdfm-x/README 2015-07-06 17:13:08.000000000 +0100 +++ b/texk/dvipdfm-x/README 2019-05-31 22:00:04.006964029 +0100 @@ -1,9 +1,17 @@ dvipdfmx and xdvipdfmx for TeX Live =================================== +This package is released under the GNU GPL, version 2, or (at your +option) any later version. + +dvipdfmx is now maintained as part of TeX Live. + +Mailing list for bug reports and all discussion: +https://lists.tug.org/dvipdfmx; anyone can join the list, but it is not +necessary to join to post. Archives are public. + xdvipdfmx ========= - xdvipdfmx is an extended version of dvipdfmx, and is now incorporated in the same sources. @@ -15,8 +23,6 @@ redistributed under the terms of the GNU General Public License, version 2 or (at your option) any later version. -There is a hope to merge xdvipdfmx into dvipdfmx. - Jonathan Kew mentions that in the past, XeTeX used a Mac-specific program xdv2pdf as the backend instead of xdvipdfmx. xdv2pdf supported a couple of special effects that are not yet available through @@ -26,18 +32,8 @@ if anyone is looking for some nontrivial but not-impossible job and happens across this file. -The dvipdfmx Project -==================== - -Copyright (C) 2002-2014 by Jin-Hwan Cho, Shunsaku Hirata, -Matthias Franz, and the dvipdfmx project team. This package is released -under the GNU GPL, version 2, or (at your option) any later version. - -dvipdfmx is now maintained as part of TeX Live. - Contents -------- - 1. Introduction 2. Installation diff -Naur a/texk/dvipdfm-x/spc_pdfm.c b/texk/dvipdfm-x/spc_pdfm.c --- a/texk/dvipdfm-x/spc_pdfm.c 2019-03-30 03:42:07.000000000 +0000 +++ b/texk/dvipdfm-x/spc_pdfm.c 2019-05-31 22:00:04.008964031 +0100 @@ -597,6 +597,7 @@ return dict; } +#define SPC_PDFM_SUPPORT_ANNOT_TRANS 1 static int spc_handler_pdfm_annot (struct spc_env *spe, struct spc_arg *args) { @@ -604,7 +605,6 @@ pdf_obj *annot_dict; pdf_rect rect; char *ident = NULL; - pdf_coord cp; transform_info ti; skip_white(&args->curptr, args->endptr); @@ -642,19 +642,96 @@ return -1; } - cp.x = spe->x_user; cp.y = spe->y_user; - pdf_dev_transform(&cp, NULL); - if (ti.flags & INFO_HAS_USER_BBOX) { - rect.llx = ti.bbox.llx + cp.x; - rect.lly = ti.bbox.lly + cp.y; - rect.urx = ti.bbox.urx + cp.x; - rect.ury = ti.bbox.ury + cp.y; - } else { - rect.llx = cp.x; - rect.lly = cp.y - spe->mag * ti.depth; - rect.urx = cp.x + spe->mag * ti.width; - rect.ury = cp.y + spe->mag * ti.height; +#ifdef SPC_PDFM_SUPPORT_ANNOT_TRANS + { + pdf_coord cp1, cp2, cp3, cp4; + /* QuadPoints not working? */ +#ifdef USE_QUADPOINTS + pdf_obj *qpoints; +#endif + if (ti.flags & INFO_HAS_USER_BBOX) { + cp1.x = spe->x_user + ti.bbox.llx; + cp1.y = spe->y_user + ti.bbox.lly; + cp2.x = spe->x_user + ti.bbox.urx; + cp2.y = spe->y_user + ti.bbox.lly; + cp3.x = spe->x_user + ti.bbox.urx; + cp3.y = spe->y_user + ti.bbox.ury; + cp4.x = spe->x_user + ti.bbox.llx; + cp4.y = spe->y_user + ti.bbox.ury; + } else { + cp1.x = spe->x_user; + cp1.y = spe->y_user - spe->mag * ti.depth; + cp2.x = spe->x_user + spe->mag * ti.width; + cp2.y = spe->y_user - spe->mag * ti.depth; + cp3.x = spe->x_user + spe->mag * ti.width; + cp3.y = spe->y_user + spe->mag * ti.height; + cp4.x = spe->x_user; + cp4.y = spe->y_user + spe->mag * ti.height; + } + pdf_dev_transform(&cp1, NULL); + pdf_dev_transform(&cp2, NULL); + pdf_dev_transform(&cp3, NULL); + pdf_dev_transform(&cp4, NULL); + rect.llx = cp1.x; + if (cp2.x < rect.llx) + rect.llx = cp2.x; + if (cp3.x < rect.llx) + rect.llx = cp3.x; + if (cp4.x < rect.llx) + rect.llx = cp4.x; + rect.urx = cp1.x; + if (cp2.x > rect.urx) + rect.urx = cp2.x; + if (cp3.x > rect.urx) + rect.urx = cp3.x; + if (cp4.x > rect.urx) + rect.urx = cp4.x; + rect.lly = cp1.y; + if (cp2.y < rect.lly) + rect.lly = cp2.y; + if (cp3.y < rect.lly) + rect.lly = cp3.y; + if (cp4.y < rect.lly) + rect.lly = cp4.y; + rect.ury = cp1.y; + if (cp2.y > rect.ury) + rect.ury = cp2.y; + if (cp3.y > rect.ury) + rect.ury = cp3.y; + if (cp4.y > rect.ury) + rect.ury = cp4.y; +#ifdef USE_QUADPOINTS + qpoints = pdf_new_array(); + pdf_add_array(qpoints, pdf_new_number(ROUND(cp1.x, 0.01))); + pdf_add_array(qpoints, pdf_new_number(ROUND(cp1.y, 0.01))); + pdf_add_array(qpoints, pdf_new_number(ROUND(cp2.x, 0.01))); + pdf_add_array(qpoints, pdf_new_number(ROUND(cp2.y, 0.01))); + pdf_add_array(qpoints, pdf_new_number(ROUND(cp3.x, 0.01))); + pdf_add_array(qpoints, pdf_new_number(ROUND(cp3.y, 0.01))); + pdf_add_array(qpoints, pdf_new_number(ROUND(cp4.x, 0.01))); + pdf_add_array(qpoints, pdf_new_number(ROUND(cp4.y, 0.01))); + pdf_add_dict(annot_dict, pdf_new_name("QuadPoints"), qpoints); +#endif } +#else + { + pdf_coord cp; + + cp.x = spe->x_user; cp.y = spe->y_user; + pdf_dev_transform(&cp, NULL); + if (ti.flags & INFO_HAS_USER_BBOX) { + rect.llx = ti.bbox.llx + cp.x; + rect.lly = ti.bbox.lly + cp.y; + rect.urx = ti.bbox.urx + cp.x; + rect.ury = ti.bbox.ury + cp.y; + } else { + rect.llx = cp.x; + rect.lly = cp.y - spe->mag * ti.depth; + rect.urx = cp.x + spe->mag * ti.width; + rect.ury = cp.y + spe->mag * ti.height; + } + } +#endif /* Order is important... */ if (ident) diff -Naur a/texk/dvipdfm-x/specials.c b/texk/dvipdfm-x/specials.c --- a/texk/dvipdfm-x/specials.c 2019-03-29 22:13:05.000000000 +0000 +++ b/texk/dvipdfm-x/specials.c 2019-05-31 22:00:04.008964031 +0100 @@ -169,12 +169,12 @@ switch (k) { /* xpos and ypos must be position in device space here. */ case K_OBJ__XPOS: - cp.x = dvi_dev_xpos(); cp.y = 0.0; + cp.x = dvi_dev_xpos(); cp.y = dvi_dev_ypos(); pdf_dev_transform(&cp, NULL); value = pdf_new_number(ROUND(cp.x, .01)); break; case K_OBJ__YPOS: - cp.x = 0.0; cp.y = dvi_dev_ypos(); + cp.x = dvi_dev_xpos(); cp.y = dvi_dev_ypos(); pdf_dev_transform(&cp, NULL); value = pdf_new_number(ROUND(cp.y, .01)); break; @@ -233,12 +233,12 @@ for (k = 0; _rkeys[k] && strcmp(key, _rkeys[k]); k++); switch (k) { case K_OBJ__XPOS: - cp.x = dvi_dev_xpos(); cp.y = 0.0; + cp.x = dvi_dev_xpos(); cp.y = dvi_dev_ypos(); pdf_dev_transform(&cp, NULL); value = pdf_new_number(ROUND(cp.x, .01)); break; case K_OBJ__YPOS: - cp.x = 0.0; cp.y = dvi_dev_ypos(); + cp.x = dvi_dev_xpos(); cp.y = dvi_dev_ypos(); pdf_dev_transform(&cp, NULL); value = pdf_new_number(ROUND(cp.y, .01)); break; diff -Naur a/texk/dvipdfm-x/tt_cmap.c b/texk/dvipdfm-x/tt_cmap.c --- a/texk/dvipdfm-x/tt_cmap.c 2019-02-25 10:27:33.000000000 +0000 +++ b/texk/dvipdfm-x/tt_cmap.c 2019-05-31 22:00:04.009964032 +0100 @@ -1,6 +1,6 @@ /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks. - Copyright (C) 2007-2018 by Jin-Hwan Cho and Shunsaku Hirata, + Copyright (C) 2002-2019 by Jin-Hwan Cho and Shunsaku Hirata, the dvipdfmx project team. This program is free software; you can redistribute it and/or modify @@ -68,11 +68,12 @@ struct cmap0 *map; int i; - if (len < 256) - ERROR("invalid cmap subtable"); + if (len < 256) { + WARN("invalid format 0 TT cmap subtable"); + return NULL; + } map = NEW(1, struct cmap0); - for (i = 0; i < 256; i++) map->glyphIndexArray[i] = sfnt_get_byte(sfont); @@ -114,14 +115,14 @@ struct cmap2 *map; USHORT i, n; - if (len < 512) - ERROR("invalid cmap subtable"); - - map = NEW(1, struct cmap2); + if (len < 512) { + WARN("invalid fromt2 TT cmap subtable"); + return NULL; + } + map = NEW(1, struct cmap2); for (i = 0; i < 256; i++) map->subHeaderKeys[i] = sfnt_get_ushort(sfont); - for (n = 0, i = 0; i < 256; i++) { map->subHeaderKeys[i] /= 8; if (n < map->subHeaderKeys[i]) @@ -129,7 +130,13 @@ } n += 1; /* the number of subHeaders is one plus the max of subHeaderKeys */ - map->subHeaders = NEW(n, struct SubHeader); + if (len < 512 + n * 8 ) { + WARN("invalid/truncated format2 TT cmap subtable"); + RELEASE(map); + return NULL; + } + + map->subHeaders = NEW(n, struct SubHeader); for (i = 0; i < n; i++) { map->subHeaders[i].firstCode = sfnt_get_ushort(sfont); map->subHeaders[i].entryCount = sfnt_get_ushort(sfont); @@ -222,8 +229,10 @@ struct cmap4 *map; USHORT i, n, segCount; - if (len < 8) - ERROR("invalid cmap subtable"); + if (len < 8) { + WARN("invalid format 4 TT cmap subtable"); + return NULL; + } map = NEW(1, struct cmap4); @@ -288,19 +297,19 @@ * Last segment maps 0xffff to gid 0 (?) */ i = segCount = map->segCountX2 / 2; - while (i-- > 0 && cc <= map->endCount[i]) { + while (i-- > 0 && cc <= map->endCount[i]) { if (cc >= map->startCount[i]) { if (map->idRangeOffset[i] == 0) { - gid = (cc + map->idDelta[i]) & 0xffff; + gid = (cc + map->idDelta[i]) & 0xffff; } else if (cc == 0xffff && map->idRangeOffset[i] == 0xffff) { - /* this is for protection against some old broken fonts... */ - gid = 0; + /* this is for protection against some old broken fonts... */ + gid = 0; } else { - j = map->idRangeOffset[i] - (segCount - i) * 2; - j = (cc - map->startCount[i]) + (j / 2); - gid = map->glyphIndexArray[j]; - if (gid != 0) - gid = (gid + map->idDelta[i]) & 0xffff; + j = map->idRangeOffset[i] - (segCount - i) * 2; + j = (cc - map->startCount[i]) + (j / 2); + gid = map->glyphIndexArray[j]; + if (gid != 0) + gid = (gid + map->idDelta[i]) & 0xffff; } break; } @@ -323,14 +332,15 @@ struct cmap6 *map; USHORT i; - if (len < 4) - ERROR("invalid cmap subtable"); + if (len < 4) { + WARN("invalid format 6 TT cmap subtable"); + return NULL; + } map = NEW(1, struct cmap6); map->firstCode = sfnt_get_ushort(sfont); map->entryCount = sfnt_get_ushort(sfont); - map->glyphIndexArray = NEW(map->entryCount, USHORT); - + map->glyphIndexArray = NEW(map->entryCount, USHORT); for (i = 0; i < map->entryCount; i++) map->glyphIndexArray[i] = sfnt_get_ushort(sfont); @@ -390,13 +400,14 @@ struct cmap12 *map; ULONG i; - if (len < 4) - ERROR("invalid cmap subtable"); + if (len < 4) { + WARN("invalid format 12 TT cmap subtable"); + return NULL; + } map = NEW(1, struct cmap12); map->nGroups = sfnt_get_ulong(sfont); map->groups = NEW(map->nGroups, struct charGroup); - for (i = 0; i < map->nGroups; i++) { map->groups[i].startCharCode = sfnt_get_ulong(sfont); map->groups[i].endCharCode = sfnt_get_ulong(sfont); @@ -427,8 +438,8 @@ cccc <= map->groups[i].endCharCode) { if (cccc >= map->groups[i].startCharCode) { gid = (USHORT) ((cccc - - map->groups[i].startCharCode + - map->groups[i].startGlyphID) & 0xffff); + map->groups[i].startCharCode + + map->groups[i].startGlyphID) & 0xffff); break; } } @@ -510,6 +521,7 @@ WARN("Unrecognized OpenType/TrueType cmap format."); tt_cmap_release(cmap); return NULL; + break; } if (!cmap->map) { @@ -526,24 +538,25 @@ if (cmap) { if (cmap->map) { - switch(cmap->format) { + switch (cmap->format) { case 0: - release_cmap0(cmap->map); - break; + release_cmap0(cmap->map); + break; case 2: - release_cmap2(cmap->map); - break; + release_cmap2(cmap->map); + break; case 4: - release_cmap4(cmap->map); - break; + release_cmap4(cmap->map); + break; case 6: - release_cmap6(cmap->map); - break; + release_cmap6(cmap->map); + break; case 12: - release_cmap12(cmap->map); - break; + release_cmap12(cmap->map); + break; default: - ERROR("Unrecognized OpenType/TrueType cmap format."); + WARN("Unrecognized OpenType/TrueType cmap format: %d", cmap->format); + break; } } RELEASE(cmap); @@ -582,129 +595,20 @@ gid = lookup_cmap12(cmap->map, (ULONG) cc); break; default: - ERROR("Unrecognized OpenType/TrueType cmap subtable format"); + WARN("Unrecognized OpenType/TrueType cmap subtable format: %d", cmap->format); break; } return gid; } -/* Sorry for placing this here. - * We need to rewrite TrueType font support code... - */ -#define WBUF_SIZE 1024 -static unsigned char wbuf[WBUF_SIZE]; static unsigned char srange_min[2] = {0x00, 0x00}; static unsigned char srange_max[2] = {0xff, 0xff}; static unsigned char lrange_min[4] = {0x00, 0x00, 0x00, 0x00}; static unsigned char lrange_max[4] = {0x7f, 0xff, 0xff, 0xff}; -static void -load_cmap4 (struct cmap4 *map, - unsigned char *GIDToCIDMap, - otl_gsub *gsub_vert, otl_gsub *gsub_list, - CMap *cmap, CMap *tounicode_add) -{ - USHORT c0, c1, gid, cid; - USHORT j, d, segCount; - USHORT ch; - int i; - - segCount = map->segCountX2 / 2; - for (i = segCount - 1; i >= 0 ; i--) { - c0 = map->startCount[i]; - c1 = map->endCount[i]; - d = map->idRangeOffset[i] / 2 - (segCount - i); - for (j = 0; j <= c1 - c0; j++) { - ch = c0 + j; - if (map->idRangeOffset[i] == 0) { - gid = (ch + map->idDelta[i]) & 0xffff; - } else if (c0 == 0xffff && c1 == 0xffff && - map->idRangeOffset[i] == 0xffff) { - /* this is for protection against some old broken fonts... */ - gid = 0; - } else { - gid = (map->glyphIndexArray[j+d] + map->idDelta[i]) & 0xffff; - } - if (gid != 0 && gid != 0xffff) { - if (gsub_list) - otl_gsub_apply_chain(gsub_list, &gid); - if (gsub_vert) - otl_gsub_apply(gsub_vert, &gid); - if (GIDToCIDMap) { - cid = ((GIDToCIDMap[2*gid] << 8)|GIDToCIDMap[2*gid+1]); - if (cid == 0) - WARN("GID %u does not have corresponding CID %u.", gid, cid); - } else { - cid = gid; - } - wbuf[0] = 0; - wbuf[1] = 0; - wbuf[2] = (ch >> 8) & 0xff; - wbuf[3] = ch & 0xff; - wbuf[4] = (cid >> 8) & 0xff; - wbuf[5] = cid & 0xff; - CMap_add_cidchar(cmap, wbuf, 4, cid); - if (tounicode_add) { - unsigned char *p = wbuf + 6; - size_t uc_len; - uc_len = UC_UTF16BE_encode_char(ch, &p, wbuf + WBUF_SIZE -1 ); - CMap_add_bfchar(tounicode_add, wbuf+4, 2, wbuf+6, uc_len); - } - } - } - } - - return; -} - -static void -load_cmap12 (struct cmap12 *map, - unsigned char *GIDToCIDMap, - otl_gsub *gsub_vert, otl_gsub *gsub_list, - CMap *cmap, CMap *tounicode_add) -{ - ULONG i, ch; /* LONG ? */ - USHORT gid, cid; - - for (i = 0; i < map->nGroups; i++) { - for (ch = map->groups[i].startCharCode; - ch <= map->groups[i].endCharCode; - ch++) { - int d = ch - map->groups[i].startCharCode; - gid = (USHORT) ((map->groups[i].startGlyphID + d) & 0xffff); - if (gsub_list) - otl_gsub_apply_chain(gsub_list, &gid); - if (gsub_vert) - otl_gsub_apply(gsub_vert, &gid); - if (GIDToCIDMap) { - cid = ((GIDToCIDMap[2*gid] << 8)|GIDToCIDMap[2*gid+1]); - if (cid == 0) - WARN("GID %u does not have corresponding CID %u.", gid, cid); - } else { - cid = gid; - } - wbuf[0] = (ch >> 24) & 0xff; - wbuf[1] = (ch >> 16) & 0xff; - wbuf[2] = (ch >> 8) & 0xff; - wbuf[3] = ch & 0xff; - wbuf[4] = (cid >> 8) & 0xff; - wbuf[5] = cid & 0xff; - CMap_add_cidchar(cmap, wbuf, 4, cid); - if (tounicode_add) { - unsigned char *p = wbuf + 6; - size_t uc_len; - uc_len = UC_UTF16BE_encode_char(ch, &p, wbuf + WBUF_SIZE -1 ); - CMap_add_bfchar(tounicode_add, wbuf+4, 2, wbuf+6, uc_len); - } - } - } - - return; -} - /* OpenType CIDFont: * * We don't use GID for them. OpenType cmap table is for @@ -717,78 +621,37 @@ #include "cff_dict.h" #include "cff.h" -static int -handle_CIDFont (sfnt *sfont, - unsigned char **GIDToCIDMap, CIDSysInfo *csi) -{ - cff_font *cffont; - int offset, i; - card16 num_glyphs, gid; - cff_charsets *charset; - unsigned char *map; - struct tt_maxp_table *maxp; - - ASSERT(csi); - - offset = sfnt_find_table_pos(sfont, "CFF "); - if (offset == 0) { - csi->registry = NULL; - csi->ordering = NULL; - *GIDToCIDMap = NULL; - return 0; - } - - maxp = tt_read_maxp_table(sfont); - num_glyphs = (card16) maxp->numGlyphs; - RELEASE(maxp); - if (num_glyphs < 1) - ERROR("No glyph contained in this font..."); - - cffont = cff_open(sfont->stream, offset, 0); - if (!cffont) - ERROR("Could not open CFF font..."); - - - if (!(cffont->flag & FONTTYPE_CIDFONT)) { - cff_close(cffont); - csi->registry = NULL; - csi->ordering = NULL; - *GIDToCIDMap = NULL; - return 0; - } +/* This should be moved to cff.c */ +static void +create_GIDToCIDMap (uint16_t *GIDToCIDMap, uint16_t num_glyphs, cff_font *cffont) +{ + cff_charsets *charset; + uint16_t gid, i; - if (!cff_dict_known(cffont->topdict, "ROS")) { - ERROR("No CIDSystemInfo???"); - } else { - card16 reg, ord; + ASSERT(GIDToCIDMap); - reg = (card16) cff_dict_get(cffont->topdict, "ROS", 0); - ord = (card16) cff_dict_get(cffont->topdict, "ROS", 1); + if (!cffont || !(cffont->flag & FONTTYPE_CIDFONT)) { + for (gid = 0; gid < num_glyphs; gid++) { + GIDToCIDMap[gid] = gid; + } - csi->registry = cff_get_string(cffont, reg); - csi->ordering = cff_get_string(cffont, ord); - csi->supplement = (int) cff_dict_get(cffont->topdict, "ROS", 2); + return; } - cff_read_charsets(cffont); - charset = cffont->charsets; - if (!charset) { - ERROR("No CFF charset data???"); - } + memset(GIDToCIDMap, 0, num_glyphs*sizeof(uint16_t)); - map = NEW(65536 * 2, unsigned char); - memset(map, 0, 65536 * 2); + charset = cffont->charsets; + if (!charset) + return; switch (charset->format) { case 0: { s_SID *cids; /* CID... */ - + cids = charset->data.glyphs; - for (gid = 1, i = 0; - i < charset->num_entries; i++) { - map[2*gid ] = (cids[i] >> 8) & 0xff; - map[2*gid+1] = cids[i] & 0xff; - gid++; + for (gid = 1, i = 0; i < charset->num_entries; i++) { + GIDToCIDMap[gid] = cids[i]; + gid++; } } break; @@ -798,16 +661,14 @@ card16 cid, count; ranges = charset->data.range1; - for (gid = 1, i = 0; - i < charset->num_entries; i++) { - cid = ranges[i].first; - count = ranges[i].n_left + 1; /* card8 */ - while (count-- > 0 && - gid <= num_glyphs) { - map[2*gid ] = (cid >> 8) & 0xff; - map[2*gid + 1] = cid & 0xff; - gid++; cid++; - } + for (gid = 1, i = 0; i < charset->num_entries; i++) { + cid = ranges[i].first; + count = ranges[i].n_left + 1; /* card8 */ + while (count-- > 0 && gid <= num_glyphs) { + GIDToCIDMap[gid] = cid; + gid++; + cid++; + } } } break; @@ -817,55 +678,52 @@ card16 cid, count; ranges = charset->data.range2; - if (charset->num_entries == 1 && - ranges[0].first == 1) { - /* "Complete" CIDFont */ - RELEASE(map); map = NULL; + if (charset->num_entries == 1 && ranges[0].first == 1) { + /* "Complete" CIDFont */ + for (gid = 0; gid < num_glyphs; gid++) { + GIDToCIDMap[gid] = gid; + } } else { - /* Not trivial mapping */ - for (gid = 1, i = 0; - i < charset->num_entries; i++) { - cid = ranges[i].first; - count = ranges[i].n_left + 1; - while (count-- > 0 && - gid <= num_glyphs) { - map[2*gid] = (cid >> 8) & 0xff; - map[2*gid+1] = cid & 0xff; - gid++; cid++; - } - } + /* Not trivial mapping */ + for (gid = 1, i = 0; i < charset->num_entries; i++) { + cid = ranges[i].first; + count = ranges[i].n_left + 1; + while (count-- > 0 && gid <= num_glyphs) { + GIDToCIDMap[gid] = cid; + gid++; + cid++; + } + } } } break; default: - RELEASE(map); map = NULL; - ERROR("Unknown CFF charset format...: %d", charset->format); + WARN("Unknown CFF charset format...: %d", charset->format); break; } - cff_close(cffont); - *GIDToCIDMap = map; - return 1; + return; } static int is_PUA_or_presentation (unsigned int uni) { /* Some of CJK Radicals Supplement and Kangxi Radicals * are commonly double encoded, lower the priority. + * CJK Compatibility Ideographs & Supplement added. */ return ((uni >= 0x2E80 && uni <= 0x2EF3) || (uni >= 0x2F00 && uni <= 0x2FD5) || (uni >= 0xE000 && uni <= 0xF8FF) || (uni >= 0xFB00 && uni <= 0xFB4F) || + (uni >= 0xF900 && uni <= 0xFAFF) || (uni >= 0x2F800 && uni <= 0x2FA1F) || (uni >= 0xF0000 && uni <= 0xFFFFD) || (uni >= 0x100000 && uni <= 0x10FFFD)); } -static char* -sfnt_get_glyphname(struct tt_post_table *post, cff_font *cffont, USHORT gid) +static char * +lookup_glyph_name (struct tt_post_table *post, cff_font *cffont, USHORT gid) { - char* name = NULL; + char *name = NULL; if (post) name = tt_get_glyphname(post, gid); - if (!name && cffont) name = cff_get_glyphname(cffont, gid); @@ -881,94 +739,82 @@ #define is_used_char2(b,c) (((b)[(c)/8]) & (1 << (7-((c)%8)))) #endif -static USHORT -handle_subst_glyphs (CMap *cmap, - CMap *cmap_add, - const char *used_glyphs, - sfnt *sfont, - cff_font *cffont) +static int32_t +handle_subst_glyphs (CMap *cmap, CMap *cmap_add, char *used_chars) { - USHORT count; - USHORT i; - struct tt_post_table *post = NULL; - - if (!cmap_add) - post = tt_read_post_table(sfont); - - for (count = 0, i = 0; i < 8192; i++) { - int j; - int32_t len; - int inbytesleft, outbytesleft; - const unsigned char *inbuf; - unsigned char *outbuf; + int32_t count = 0; + int32_t cid; - if (used_glyphs[i] == 0) + for (cid = 0; cid < 65536; cid++) { + if (!is_used_char2(used_chars, cid)) continue; + else { + unsigned char buf[256]; + int inbytesleft = 2, outbytesleft = 254; + size_t len; + unsigned char *outbuf = buf + 2; + const unsigned char *inbuf = buf; + + buf[0] = (cid >> 8) & 0xff; + buf[1] = cid & 0xff; + CMap_decode(cmap_add, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + if (inbytesleft == 0) { + len = 254 - outbytesleft; + CMap_add_bfchar(cmap, buf, 2, buf + 2, len); + used_chars[cid / 8] &= ~(1 << (7 - (cid % 8))); + count++; + } + } + } - for (j = 0; j < 8; j++) { - USHORT gid = 8 * i + j; + return count; +} - if (!is_used_char2(used_glyphs, gid)) - continue; +static int32_t +add_ToUnicode_via_glyph_name (CMap *cmap, char *used_chars, USHORT num_glyphs, + uint16_t *GIDToCIDMap, + sfnt *sfont, cff_font *cffont) +{ + int32_t count = 0; + USHORT gid; + struct tt_post_table *post = NULL; - if (!cmap_add) { -#define MAX_UNICODES 16 - /* try to look up Unicode values from the glyph name... */ - char* name; - int32_t unicodes[MAX_UNICODES]; - int unicode_count = -1; - name = sfnt_get_glyphname(post, cffont, gid); - if (name) { - unicode_count = agl_get_unicodes(name, unicodes, MAX_UNICODES); - } + post = tt_read_post_table(sfont); + if (!post && !cffont) + return count; + + for (gid = 0; gid < num_glyphs; gid++) { + uint16_t cid = GIDToCIDMap[gid]; + if (is_used_char2(used_chars, cid)) { +#define MAX_UNICODES 32 + char *name; + int32_t unicodes[MAX_UNICODES]; + int unicode_count = -1; + + name = lookup_glyph_name(post, cffont, gid); + if (name) { + unicode_count = agl_get_unicodes(name, unicodes, MAX_UNICODES); #undef MAX_UNICODES - if (unicode_count == -1) { - if(dpx_conf.verbose_level > VERBOSE_LEVEL_MIN) { - if (name) - MESG("No Unicode mapping available: GID=%u, name=%s\n", gid, name); - else - MESG("No Unicode mapping available: GID=%u\n", gid); - } - } else { - /* the Unicode characters go into wbuf[2] and following, in UTF16BE */ - /* we rely on WBUF_SIZE being more than adequate for MAX_UNICODES */ - unsigned char* p = wbuf + 2; - int k; - len = 0; + RELEASE(name); + if (unicode_count > 0) { + unsigned char *buf; + unsigned char *p, *endptr; + int k; + size_t len = 0; + + buf = NEW(unicode_count*4+2, unsigned char); + p = buf + 2; + endptr = buf + (unicode_count * 4 + 2); for (k = 0; k < unicode_count; ++k) { - len += UC_UTF16BE_encode_char(unicodes[k], &p, wbuf+WBUF_SIZE); + len += UC_UTF16BE_encode_char(unicodes[k], &p, endptr); } - wbuf[0] = (gid >> 8) & 0xff; - wbuf[1] = gid & 0xff; - CMap_add_bfchar(cmap, wbuf, 2, wbuf + 2, len); - } - RELEASE(name); - } else { - wbuf[0] = (gid >> 8) & 0xff; - wbuf[1] = gid & 0xff; - - inbuf = wbuf; - inbytesleft = 2; - outbuf = wbuf + 2; - outbytesleft = WBUF_SIZE - 2; - CMap_decode(cmap_add, &inbuf, &inbytesleft, &outbuf, &outbytesleft); - - if (inbytesleft != 0) { - WARN("CMap conversion failed..."); - } else { - len = WBUF_SIZE - 2 - outbytesleft; - CMap_add_bfchar(cmap, wbuf, 2, wbuf + 2, len); + buf[0] = (cid >> 8) & 0xff; + buf[1] = cid & 0xff; + CMap_add_bfchar(cmap, buf, 2, buf + 2, len); + used_chars[cid / 8] &= ~(1 << (7 - (cid % 8))); count++; - if (dpx_conf.verbose_level > VERBOSE_LEVEL_MIN) { - int _i; - - MESG("otf_cmap>> Additional ToUnicode mapping: <%04X> <", gid); - for (_i = 0; _i < len; _i++) { - MESG("%02X", wbuf[2 + _i]); - } - MESG(">\n"); - } + RELEASE(buf); } } } @@ -980,70 +826,11 @@ return count; } -static cff_font * -prepare_CIDFont_from_sfnt(sfnt* sfont) -{ - cff_font *cffont; - unsigned offset = 0; - - if (sfont->type != SFNT_TYPE_POSTSCRIPT || - sfnt_read_table_directory(sfont, 0) < 0 || - (offset = sfnt_find_table_pos(sfont, "CFF ")) == 0) { - return NULL; - } - - cffont = cff_open(sfont->stream, offset, 0); - if (!cffont) - return NULL; - - cff_read_charsets(cffont); - return cffont; -} - -static USHORT -add_to_cmap_if_used (CMap *cmap, - cff_font *cffont, - char *used_chars, - USHORT gid, - ULONG ch) -{ - USHORT count = 0; - USHORT cid = cffont ? cff_charsets_lookup_inverse(cffont, gid) : gid; - if (is_used_char2(used_chars, cid)) { - int len; - unsigned char *p = wbuf + 2; - - count++; - - wbuf[0] = (cid >> 8) & 0xff; - wbuf[1] = (cid & 0xff); - len = UC_UTF16BE_encode_char((int32_t) ch, &p, wbuf + WBUF_SIZE); - CMap_add_bfchar(cmap, wbuf, 2, wbuf + 2, len); - - /* Skip PUA characters and alphabetic presentation forms, allowing - * handle_subst_glyphs() as it might find better mapping. Fixes the - * mapping of ligatures encoded in PUA in fonts like Linux Libertine - * and old Adobe fonts. - */ - if (!is_PUA_or_presentation(ch)) { - /* Avoid duplicate entry - * There are problem when two Unicode code is mapped to - * single glyph... - */ - used_chars[cid / 8] &= ~(1 << (7 - (cid % 8))); - } - } - - return count; -} - -static USHORT -create_ToUnicode_cmap4 (CMap *cmap, - struct cmap4 *map, - char *used_chars, - cff_font *cffont) +static void +create_inverse_cmap4 (int32_t *map_base, int32_t *map_sub, USHORT num_glyphs, + struct cmap4 *map) { - USHORT count = 0, segCount = map->segCountX2 / 2; + USHORT segCount = map->segCountX2 / 2; USHORT i, j; for (i = 0; i < segCount; i++) { @@ -1062,32 +849,33 @@ } else { gid = (map->glyphIndexArray[j + d] + map->idDelta[i]) & 0xffff; } - - count += add_to_cmap_if_used(cmap, cffont, used_chars, gid, ch); + if (is_PUA_or_presentation(ch)) { + map_sub[gid] = ch; + } else { + map_base[gid] = ch; + } } } - - return count; } -static USHORT -create_ToUnicode_cmap12 (CMap *cmap, - struct cmap12 *map, - char *used_chars, - cff_font *cffont) +static void +create_inverse_cmap12 (int32_t *map_base, int32_t *map_sub, USHORT num_glyphs, + struct cmap12 *map) { - ULONG i, ch, count = 0; + ULONG i, ch; for (i = 0; i < map->nGroups; i++) { for (ch = map->groups[i].startCharCode; ch <= map->groups[i].endCharCode; ch++) { int d = ch - map->groups[i].startCharCode; USHORT gid = (USHORT) ((map->groups[i].startGlyphID + d) & 0xffff); - count += add_to_cmap_if_used(cmap, cffont, used_chars, gid, ch); + if (is_PUA_or_presentation(ch)) { + map_sub[gid] = ch; + } else { + map_base[gid] = ch; + } } } - - return count; } /* NOTE: Reverse mapping code which had been placed here is removed since: @@ -1096,60 +884,163 @@ * Especially, the second one causes problems. */ static pdf_obj * -create_ToUnicode_cmap (tt_cmap *ttcmap, +create_ToUnicode_cmap (tt_cmap *ttcmap, const char *cmap_name, - CMap *cmap_add, + CMap *cmap_add, const char *used_chars, - sfnt *sfont) + sfnt *sfont) { - pdf_obj *stream = NULL; - CMap *cmap; - USHORT count = 0; - cff_font *cffont = prepare_CIDFont_from_sfnt(sfont); - char is_cidfont = cffont && (cffont->flag & FONTTYPE_CIDFONT); - - cmap = CMap_new(); - CMap_set_name (cmap, cmap_name); - CMap_set_wmode(cmap, 0); - CMap_set_type (cmap, CMAP_TYPE_TO_UNICODE); - CMap_set_CIDSysInfo(cmap, &CSI_UNICODE); - CMap_add_codespacerange(cmap, srange_min, srange_max, 2); + pdf_obj *stream = NULL; + int32_t *map_base = NULL, *map_sub = NULL; + USHORT gid, num_glyphs = 0; - /* cmap_add here stores information about all unencoded glyphs which can be - * accessed only through OT Layout GSUB table. - */ - { - char used_chars_copy[8192]; - memcpy(used_chars_copy, used_chars, 8192); + ASSERT(ttcmap); - /* For create_ToUnicode_cmap{4,12}(), cffont is for GID -> CID lookup, - * so it is only needed for CID fonts. */ - switch (ttcmap->format) { - case 4: - count = create_ToUnicode_cmap4(cmap, ttcmap->map, used_chars_copy, - is_cidfont ? cffont : NULL); - break; - case 12: - count = create_ToUnicode_cmap12(cmap, ttcmap->map, used_chars_copy, - is_cidfont ? cffont : NULL); - break; + /* Get num_glyphs from maxp talbe */ + { + struct tt_maxp_table *maxp; + + maxp = tt_read_maxp_table(sfont); + if (maxp) { + num_glyphs = maxp->numGlyphs; + RELEASE(maxp); } + } - /* For handle_subst_glyphs(), cffont is for GID -> glyph name lookup, so - * it is only needed for non-CID fonts. */ - count += handle_subst_glyphs(cmap, cmap_add, used_chars_copy, sfont, - is_cidfont ? NULL : cffont); + /* Initialize GID to Unicode mapping table */ + map_base = NEW(num_glyphs, int32_t); + map_sub = NEW(num_glyphs, int32_t); + for (gid = 0; gid < num_glyphs; gid++) { + map_base[gid] = -1; + map_sub [gid] = -1; } - if (count < 1) - stream = NULL; - else { - stream = CMap_create_stream(cmap); + /* Create "base" mapping from inverse mapping of OpenType cmap */ + switch (ttcmap->format) { + case 4: + create_inverse_cmap4(map_base, map_sub, num_glyphs, ttcmap->map); + break; + case 12: + create_inverse_cmap12(map_base, map_sub, num_glyphs, ttcmap->map); + break; } - CMap_release(cmap); - if (cffont) - cff_close(cffont); + /* Now create ToUnicode CMap stream */ + { + CMap *cmap; + int32_t count; + cff_font *cffont = NULL; + char is_cidfont = 0; + uint16_t *GIDToCIDMap = NULL; + char *used_chars_copy = NULL; + + if (sfont->type == SFNT_TYPE_POSTSCRIPT) { + ULONG offset; + offset = sfnt_find_table_pos(sfont, "CFF "); + cffont = cff_open(sfont->stream, offset, 0); + cff_read_charsets(cffont); + } + is_cidfont = cffont && (cffont->flag & FONTTYPE_CIDFONT); + + /* GIT to CID mapping info. */ + GIDToCIDMap = NEW(num_glyphs, uint16_t); + if (is_cidfont) { + create_GIDToCIDMap(GIDToCIDMap, num_glyphs, cffont); + } else { + for (gid = 0; gid < num_glyphs; gid++) { + GIDToCIDMap[gid] = gid; + } + } + cmap = CMap_new(); + CMap_set_name (cmap, cmap_name); + CMap_set_wmode(cmap, 0); + CMap_set_type (cmap, CMAP_TYPE_TO_UNICODE); + CMap_set_CIDSysInfo(cmap, &CSI_UNICODE); + CMap_add_codespacerange(cmap, srange_min, srange_max, 2); + + count = 0; + used_chars_copy = NEW(8192, char); + memcpy(used_chars_copy, used_chars, 8192); + for (gid = 0; gid < num_glyphs; gid++) { + uint16_t cid = GIDToCIDMap[gid]; + if (is_used_char2(used_chars_copy, cid)) { + int32_t ch; + unsigned char src[2], dst[4]; + unsigned char *p = dst, *endptr = dst + 4; + size_t len; + + ch = map_base[gid]; + if (UC_is_valid(ch)) { + src[0] = (cid >> 8) & 0xff; + src[1] = cid & 0xff; + len = UC_UTF16BE_encode_char(ch, &p, endptr); + CMap_add_bfchar(cmap, src, 2, dst, len); + used_chars_copy[cid / 8] &= ~(1 << (7 - (cid % 8))); + count++; + } + } + } + + /* cmap_add here stores information about all unencoded glyphs which can be + * accessed only through OT Layout GSUB table. + * This is only availabel when encoding is "unicode". + */ + if (cmap_add) { + count += handle_subst_glyphs(cmap, cmap_add, used_chars_copy); + } else { + /* Else, try gathering information from GSUB tables */ + count += otl_gsub_add_ToUnicode(cmap, used_chars_copy, + map_base, map_sub, num_glyphs, + GIDToCIDMap, sfont); + } + /* Find Unicode mapping via PostScript glyph names... */ + count += add_ToUnicode_via_glyph_name(cmap, used_chars_copy, num_glyphs, + GIDToCIDMap, sfont, is_cidfont ? NULL : cffont); + if (cffont) + cff_close(cffont); + + /* Finaly, PUA and presentation forms... */ + for (gid = 0; gid < num_glyphs; gid++) { + uint16_t cid = GIDToCIDMap[gid]; + if (is_used_char2(used_chars_copy, cid)) { + int32_t ch; + unsigned char src[2], dst[4]; + unsigned char *p = dst, *endptr = dst + 4; + size_t len; + + ch = map_sub[gid]; + if (UC_is_valid(ch)) { + src[0] = (cid >> 8) & 0xff; + src[1] = cid & 0xff; + len = UC_UTF16BE_encode_char(ch, &p, endptr); + CMap_add_bfchar(cmap, src, 2, dst, len); + used_chars_copy[cid / 8] &= ~(1 << (7 - (cid % 8))); + count++; + } + } + } + + /* Check for missing mapping */ + if (dpx_conf.verbose_level > VERBOSE_LEVEL_MIN) { + for (gid = 0; gid < num_glyphs; gid++) { + uint16_t cid = GIDToCIDMap[gid]; + if (is_used_char2(used_chars_copy, cid)) { + WARN("Unable to find ToUnicode mapping for glyph CID=%u (GID=%u)", cid, gid); + } + } + } + RELEASE(GIDToCIDMap); + RELEASE(used_chars_copy); + + if (count < 1) + stream = NULL; + else { + stream = CMap_create_stream(cmap); + } + CMap_release(cmap); + } + RELEASE(map_base); + RELEASE(map_sub); return stream; } @@ -1169,29 +1060,27 @@ pdf_obj * otf_create_ToUnicode_stream (const char *font_name, - int ttc_index, /* 0 for non-TTC */ + int ttc_index, /* 0 for non-TTC */ const char *basefont, const char *used_chars) { - pdf_obj *cmap_ref = NULL; - int res_id; - pdf_obj *cmap_obj = NULL; - CMap *cmap_add; - int cmap_add_id; - tt_cmap *ttcmap; - char *cmap_name, *cmap_add_name; - FILE *fp = NULL; - sfnt *sfont; - ULONG offset = 0; - int i; + pdf_obj *cmap_ref = NULL; /* returned value */ + CMap *cmap_add = NULL; + char *cmap_name; + FILE *fp = NULL; + sfnt *sfont; + ULONG offset = 0; + tt_cmap *ttcmap; + int cmap_id, cmap_add_id; + int i; cmap_name = NEW(strlen(basefont)+strlen("-UTF16")+1, char); sprintf(cmap_name, "%s-UTF16", basefont); - res_id = pdf_findresource("CMap", cmap_name); - if (res_id >= 0) { + cmap_id = pdf_findresource("CMap", cmap_name); + if (cmap_id >= 0) { RELEASE(cmap_name); - cmap_ref = pdf_get_resource_reference(res_id); + cmap_ref = pdf_get_resource_reference(cmap_id); return cmap_ref; } @@ -1212,7 +1101,10 @@ } if (!sfont) { - ERROR("Could not open OpenType/TrueType font file \"%s\"", font_name); + WARN("Could not open OpenType/TrueType font file \"%s\"", font_name); + RELEASE(cmap_name); + DPXFCLOSE(fp); + return NULL; } switch (sfont->type) { @@ -1222,7 +1114,11 @@ case SFNT_TYPE_TTC: offset = ttc_read_offset(sfont, ttc_index); if (offset == 0) { - ERROR("Invalid TTC index"); + WARN("Invalid TTC index for font: %s", font_name); + sfnt_close(sfont); + DPXFCLOSE(fp); + RELEASE(cmap_name); + return NULL; } break; default: @@ -1231,111 +1127,180 @@ } if (sfnt_read_table_directory(sfont, offset) < 0) { - ERROR("Could not read OpenType/TrueType table directory."); + WARN("Could not read OpenType/TrueType table directory: %s", font_name); + sfnt_close(sfont); + DPXFCLOSE(fp); + RELEASE(cmap_name); + return NULL; } - cmap_add_name = NEW(strlen(font_name)+strlen(",000-UCS32-Add")+1, char); - sprintf(cmap_add_name, "%s,%03d-UCS32-Add", font_name, ttc_index); - cmap_add_id = CMap_cache_find(cmap_add_name); - RELEASE(cmap_add_name); - if (cmap_add_id < 0) { - cmap_add = NULL; - } else { - cmap_add = CMap_cache_get(cmap_add_id); + /* cmap_add is used for storing information on ToUnicode mapping for + * unencoded glyphs which can be reached only through GSUB substitution. + * This is available only when "unicode" is specified in the encoding + * field of fontmap. We remember the inverse mapping via cmap_add in this + * case. + */ + { + char *cmap_add_name; + + cmap_add_name = NEW(strlen(font_name)+strlen(",000-UCS32-Add")+1, char); + sprintf(cmap_add_name, "%s,%03d-UCS32-Add", font_name, ttc_index); + cmap_add_id = CMap_cache_find(cmap_add_name); + RELEASE(cmap_add_name); + if (cmap_add_id < 0) { + cmap_add = NULL; + } else { + cmap_add = CMap_cache_get(cmap_add_id); + } } - CMap_set_silent(1); /* many warnings without this... */ + ttcmap = NULL; for (i = 0; i < sizeof(cmap_plat_encs) / sizeof(cmap_plat_enc_rec); ++i) { ttcmap = tt_cmap_read(sfont, cmap_plat_encs[i].platform, cmap_plat_encs[i].encoding); if (!ttcmap) continue; if (ttcmap->format == 4 || ttcmap->format == 12) { - cmap_obj = create_ToUnicode_cmap(ttcmap, cmap_name, cmap_add, used_chars, sfont); break; + } else { + tt_cmap_release(ttcmap); + ttcmap = NULL; } } -#if defined(LIBDPX) - if (cmap_obj == NULL && dpx_conf.verbose_level > VERBOSE_LEVEL_MIN) -#else - if (cmap_obj == NULL) -#endif /* LIBDPX */ - WARN("Unable to read OpenType/TrueType Unicode cmap table."); - tt_cmap_release(ttcmap); - CMap_set_silent(0); - - if (cmap_obj) { - res_id = pdf_defineresource("CMap", cmap_name, - cmap_obj, PDF_RES_FLUSH_IMMEDIATE); - cmap_ref = pdf_get_resource_reference(res_id); - } else { - cmap_ref = NULL; + if (ttcmap) { + pdf_obj *cmap_obj; + + CMap_set_silent(1); /* many warnings without this... */ + cmap_obj = create_ToUnicode_cmap(ttcmap, cmap_name, cmap_add, used_chars, sfont); + CMap_set_silent(0); + if (cmap_obj) { + cmap_id = pdf_defineresource("CMap", cmap_name, + cmap_obj, PDF_RES_FLUSH_IMMEDIATE); + cmap_ref = pdf_get_resource_reference(cmap_id); + } + tt_cmap_release(ttcmap); } - RELEASE(cmap_name); + /* Cleanup */ + RELEASE(cmap_name); sfnt_close(sfont); - if (fp) - DPXFCLOSE(fp); + DPXFCLOSE(fp); + +#ifndef LIBDPX + if (!cmap_ref) { + WARN("Creating ToUnicode CMap failed for \"%s\"", font_name); + } +#endif return cmap_ref; } -static int -load_base_CMap (const char *cmap_name, CMap *tounicode_add, int wmode, - CIDSysInfo *csi, unsigned char *GIDToCIDMap, - otl_gsub *gsub_vert, otl_gsub *gsub_list, - tt_cmap *ttcmap) -{ - int cmap_id; - cmap_id = CMap_cache_find(cmap_name); - if (cmap_id < 0) { - CMap *cmap; +/* Creating input CMaps from OT cmap table */ - cmap = CMap_new(); - CMap_set_name (cmap, cmap_name); - CMap_set_type (cmap, CMAP_TYPE_CODE_TO_CID); - CMap_set_wmode(cmap, wmode); - CMap_add_codespacerange(cmap, lrange_min, lrange_max, 4); +static void +load_cmap4 (struct cmap4 *map, uint16_t *GIDToCIDMap, USHORT num_glyphs, + otl_gsub *gsub_vert, otl_gsub *gsub_list, + CMap *cmap, int32_t *map_base, int32_t *map_sub) +{ + USHORT c0, c1, gid, cid; + USHORT j, d, segCount; + USHORT ch; + int i; + unsigned char buf[4]; - if (csi) { /* CID */ - CMap_set_CIDSysInfo(cmap, csi); - } else { - CMap_set_CIDSysInfo(cmap, &CSI_IDENTITY); + segCount = map->segCountX2 / 2; + for (i = segCount - 1; i >= 0 ; i--) { + c0 = map->startCount[i]; + c1 = map->endCount[i]; + d = map->idRangeOffset[i] / 2 - (segCount - i); + for (j = 0; j <= c1 - c0; j++) { + ch = c0 + j; + if (map->idRangeOffset[i] == 0) { + gid = (ch + map->idDelta[i]) & 0xffff; + } else if (c0 == 0xffff && c1 == 0xffff && map->idRangeOffset[i] == 0xffff) { + /* this is for protection against some old broken fonts... */ + gid = 0; + } else { + gid = (map->glyphIndexArray[j+d] + map->idDelta[i]) & 0xffff; + } + if (gid != 0 && gid != 0xffff) { + /* Apply GSUB features */ + if (gsub_list) + otl_gsub_apply_chain(gsub_list, &gid); + if (gsub_vert) + otl_gsub_apply(gsub_vert, &gid); + cid = (gid < num_glyphs) ? GIDToCIDMap[gid] : 0; + buf[0] = 0; + buf[1] = 0; + buf[2] = (ch >> 8) & 0xff; + buf[3] = ch & 0xff; + CMap_add_cidchar(cmap, buf, 4, cid); + /* For ToUnicode creation */ + if (map_base && map_sub) { + if (is_PUA_or_presentation(ch)) { + map_sub[gid] = ch; + } else { + map_base[gid] = ch; + } + } + } } + } - if (ttcmap->format == 12) { - load_cmap12(ttcmap->map, GIDToCIDMap, gsub_vert, gsub_list, - cmap, tounicode_add); - } else if (ttcmap->format == 4) { - load_cmap4(ttcmap->map, GIDToCIDMap, gsub_vert, gsub_list, - cmap, tounicode_add); - } + return; +} - cmap_id = CMap_cache_add(cmap); +static void +load_cmap12 (struct cmap12 *map, uint16_t *GIDToCIDMap, USHORT num_glyphs, + otl_gsub *gsub_vert, otl_gsub *gsub_list, + CMap *cmap, int32_t *map_base, int32_t *map_sub) +{ + ULONG i, ch; + USHORT gid, cid; + unsigned char buf[4]; + + for (i = 0; i < map->nGroups; i++) { + for (ch = map->groups[i].startCharCode; + ch <= map->groups[i].endCharCode; ch++) { + int d = ch - map->groups[i].startCharCode; + gid = (USHORT) ((map->groups[i].startGlyphID + d) & 0xffff); + if (gsub_list) + otl_gsub_apply_chain(gsub_list, &gid); + if (gsub_vert) + otl_gsub_apply(gsub_vert, &gid); + cid = (gid < num_glyphs) ? GIDToCIDMap[gid] : 0; + buf[0] = (ch >> 24) & 0xff; + buf[1] = (ch >> 16) & 0xff; + buf[2] = (ch >> 8) & 0xff; + buf[3] = ch & 0xff; + CMap_add_cidchar(cmap, buf, 4, cid); + if (map_base && map_sub) { + if (is_PUA_or_presentation(ch)) { + map_sub[gid] = ch; + } else { + map_base[gid] = ch; + } + } + } } - return cmap_id; + return; } int otf_load_Unicode_CMap (const char *map_name, int ttc_index, /* 0 for non-TTC font */ - const char *otl_tags, int wmode) + const char *otl_tags, int wmode) { - int cmap_id = -1; - /* Additional ToUncidoe mappings required by OTL GSUB substitusion */ - int tounicode_add_id = -1; - CMap *tounicode_add = NULL; - char *tounicode_add_name = NULL; - int is_cidfont = 0; - sfnt *sfont; - ULONG offset = 0; - char *cmap_name = NULL; - FILE *fp = NULL; - otl_gsub *gsub_vert = NULL, *gsub_list = NULL; - tt_cmap *ttcmap; - CIDSysInfo csi = {NULL, NULL, 0}; - unsigned char *GIDToCIDMap = NULL; + int cmap_id = -1; + char *cmap_name = NULL; + sfnt *sfont = NULL; + ULONG offset = 0; + uint16_t num_glyphs = 0; + FILE *fp = NULL; + tt_cmap *ttcmap = NULL; + CIDSysInfo csi = {NULL, NULL, 0}; + uint16_t *GIDToCIDMap = NULL; if (!map_name) return -1; @@ -1359,11 +1324,6 @@ sprintf(cmap_name, "%s,%03d-UCS4-H", map_name, ttc_index); } } - if (dpx_conf.verbose_level > VERBOSE_LEVEL_MIN) { - MESG("\n"); - MESG("otf_cmap>> Unicode charmap for font=\"%s\" layout=\"%s\"\n", - map_name, (otl_tags ? otl_tags : "none")); - } cmap_id = CMap_cache_find(cmap_name); if (cmap_id >= 0) { RELEASE(cmap_name); @@ -1374,6 +1334,12 @@ } /* CMap not found */ + if (dpx_conf.verbose_level > VERBOSE_LEVEL_MIN) { + MESG("\n"); + MESG("otf_cmap>> Creating Unicode charmap for font=\"%s\" layout=\"%s\"\n", + map_name, (otl_tags ? otl_tags : "none")); + } + fp = DPXFOPEN(map_name, DPX_RES_TYPE_TTFONT); if (!fp) { fp = DPXFOPEN(map_name, DPX_RES_TYPE_OTFONT); @@ -1390,13 +1356,20 @@ } if (!sfont) { - ERROR("Could not open OpenType/TrueType/dfont font file \"%s\"", map_name); + WARN("Could not open OpenType/TrueType/dfont font file \"%s\"", map_name); + RELEASE(cmap_name); + DPXFCLOSE(fp); + return -1; } switch (sfont->type) { case SFNT_TYPE_TTC: offset = ttc_read_offset(sfont, ttc_index); if (offset == 0) { - ERROR("Invalid TTC index"); + WARN("Offset=0 returned for font=%s, TTC_index=%d", map_name, ttc_index); + RELEASE(cmap_name); + sfnt_close(sfont); + DPXFCLOSE(fp); + return -1; } break; case SFNT_TYPE_TRUETYPE: @@ -1407,41 +1380,79 @@ offset = sfont->offset; break; default: - ERROR("Not a OpenType/TrueType/TTC font?: %s", map_name); + WARN("Not a OpenType/TrueType/TTC font?: %s", map_name); + RELEASE(cmap_name); + sfnt_close(sfont); + DPXFCLOSE(fp); + return -1; break; } - if (sfnt_read_table_directory(sfont, offset) < 0) - ERROR("Could not read OpenType/TrueType table directory."); + if (sfnt_read_table_directory(sfont, offset) < 0) { + WARN("Could not read OpenType/TrueType table directory: %s", map_name); + RELEASE(cmap_name); + sfnt_close(sfont); + DPXFCLOSE(fp); + return -1; + } + { + struct tt_maxp_table *maxp; - if (otl_tags) { - /* tounicode_add here is later refered by otf_create_ToUnicode_stream() - * for finding additional CID to Unicode mapping entries required by - * OTL gsub substitution. - */ - tounicode_add_name = NEW(strlen(map_name)+strlen(",000-UCS32-Add")+1, char); - sprintf(tounicode_add_name, "%s,%03d-UCS32-Add", map_name, ttc_index); - tounicode_add_id = CMap_cache_find(tounicode_add_name); - if (tounicode_add_id >= 0) - tounicode_add = CMap_cache_get(tounicode_add_id); - else { - tounicode_add = CMap_new(); - CMap_set_name (tounicode_add, tounicode_add_name); - CMap_set_type (tounicode_add, CMAP_TYPE_TO_UNICODE); - CMap_set_wmode(tounicode_add, 0); - CMap_add_codespacerange(tounicode_add, srange_min, srange_max, 2); - CMap_set_CIDSysInfo(tounicode_add, &CSI_UNICODE); - CMap_add_bfchar(tounicode_add, srange_min, 2, srange_max, 2); - tounicode_add_id = CMap_cache_add(tounicode_add); - } - RELEASE(tounicode_add_name); + maxp = tt_read_maxp_table(sfont); + num_glyphs = (card16) maxp->numGlyphs; + RELEASE(maxp); } + GIDToCIDMap = NEW(num_glyphs, uint16_t); + memset(GIDToCIDMap, 0, num_glyphs*sizeof(uint16_t)); if (sfont->type == SFNT_TYPE_POSTSCRIPT) { - is_cidfont = handle_CIDFont(sfont, &GIDToCIDMap, &csi); + cff_font *cffont; + card16 gid; + + offset = sfnt_find_table_pos(sfont, "CFF "); + cffont = cff_open(sfont->stream, offset, 0); + if (!cffont) { + RELEASE(cmap_name); + RELEASE(GIDToCIDMap); + sfnt_close(sfont); + DPXFCLOSE(fp); + return -1; + } + if (!(cffont->flag & FONTTYPE_CIDFONT)) { + csi.registry = strdup("Adobe"); + csi.ordering = strdup("Identity"); + csi.supplement = 0; + for (gid = 0; gid < num_glyphs; gid++) { + GIDToCIDMap[gid] = gid; + } + } else { + if (!cff_dict_known(cffont->topdict, "ROS")) { + csi.registry = strdup("Adobe"); + csi.ordering = strdup("Identity"); + csi.supplement = 0; + } else { + card16 reg, ord; + + reg = (card16) cff_dict_get(cffont->topdict, "ROS", 0); + ord = (card16) cff_dict_get(cffont->topdict, "ROS", 1); + csi.registry = cff_get_string(cffont, reg); + csi.ordering = cff_get_string(cffont, ord); + csi.supplement = (int) cff_dict_get(cffont->topdict, "ROS", 2); + } + cff_read_charsets(cffont); + create_GIDToCIDMap(GIDToCIDMap, num_glyphs, cffont); + } + cff_close(cffont); } else { - is_cidfont = 0; + uint16_t gid; + + csi.registry = strdup("Adobe"); + csi.ordering = strdup("Identity"); + csi.supplement = 0; + for (gid = 0; gid < num_glyphs; gid++) { + GIDToCIDMap[gid] = gid; + } } ttcmap = tt_cmap_read(sfont, 3, 10); /* Microsoft UCS4 */ @@ -1449,63 +1460,122 @@ ttcmap = tt_cmap_read(sfont, 3, 1); /* Microsoft UCS2 */ if (!ttcmap) { ttcmap = tt_cmap_read(sfont, 0, 3); /* Unicode 2.0 or later */ -#if defined(LIBDPX) - if (!ttcmap && dpx_conf.verbose_level > VERBOSE_LEVEL_MIN) { -#else - if (!ttcmap) { -#endif /* LIBDPX */ - ERROR("Unable to read OpenType/TrueType Unicode cmap table."); - } } } - if (wmode == 1) { - gsub_vert = otl_gsub_new(); - if (otl_gsub_add_feat(gsub_vert, "*", "*", "vrt2", sfont) < 0) { - if (otl_gsub_add_feat(gsub_vert, "*", "*", "vert", sfont) < 0) { - WARN("GSUB feature vrt2/vert not found."); - otl_gsub_release(gsub_vert); - gsub_vert = NULL; + + if (ttcmap) { + CMap *cmap = NULL; + int32_t *map_base, *map_sub; + otl_gsub *gsub_vert = NULL; + otl_gsub *gsub_list = NULL; + uint32_t gid; + + if (wmode == 1) { + gsub_vert = otl_gsub_new(); + if (otl_gsub_add_feat(gsub_vert, "*", "*", "vrt2", sfont) < 0) { + if (otl_gsub_add_feat(gsub_vert, "*", "*", "vert", sfont) < 0) { + WARN("GSUB feature vrt2/vert not found."); + otl_gsub_release(gsub_vert); + gsub_vert = NULL; + } else { + otl_gsub_select(gsub_vert, "*", "*", "vert"); + } } else { - otl_gsub_select(gsub_vert, "*", "*", "vert"); + otl_gsub_select(gsub_vert, "*", "*", "vrt2"); } } else { - otl_gsub_select(gsub_vert, "*", "*", "vrt2"); + gsub_vert = NULL; } - } else { - gsub_vert = NULL; - } - if (otl_tags) { - gsub_list = otl_gsub_new(); - if (otl_gsub_add_feat_list(gsub_list, otl_tags, sfont) < 0) { - WARN("Readin GSUB feature table(s) failed for \"%s\"", otl_tags); + if (otl_tags) { + gsub_list = otl_gsub_new(); + if (otl_gsub_add_feat_list(gsub_list, otl_tags, sfont) < 0) { + WARN("Reading GSUB feature table(s) failed for \"%s\"", otl_tags); + } else { + otl_gsub_set_chain(gsub_list, otl_tags); + } } else { - otl_gsub_set_chain(gsub_list, otl_tags); + gsub_list = NULL; } - } else { - gsub_list = NULL; + cmap = CMap_new(); + CMap_set_name(cmap, cmap_name); + CMap_set_type(cmap, CMAP_TYPE_CODE_TO_CID); + CMap_set_wmode(cmap, wmode); + CMap_add_codespacerange(cmap, lrange_min, lrange_max, 4); + CMap_set_CIDSysInfo(cmap, &csi); + map_base = NEW(num_glyphs, int32_t); + map_sub = NEW(num_glyphs, int32_t); + for (gid = 0; gid < num_glyphs; gid++) { + map_base[gid] = -1; + map_sub[gid] = -1; + } + switch (ttcmap->format) { + case 12: + load_cmap12(ttcmap->map, GIDToCIDMap, num_glyphs, + gsub_vert, gsub_list, + cmap, map_base, map_sub); + break; + case 4: + load_cmap4(ttcmap->map, GIDToCIDMap, num_glyphs, + gsub_vert, gsub_list, + cmap, map_base, map_sub); + break; + } + if (gsub_vert) + otl_gsub_release(gsub_vert); + if (gsub_list) + otl_gsub_release(gsub_list); + tt_cmap_release(ttcmap); + + if (otl_tags) { + CMap *tounicode = NULL; + char *tounicode_name; + int tounicode_id; + + tounicode_name = NEW(strlen(map_name)+strlen(",000-UCS32-Add")+1, char); + sprintf(tounicode_name, "%s,%03d-UCS32-Add", map_name, ttc_index); + tounicode_id = CMap_cache_find(tounicode_name); + if (tounicode_id >= 0) + tounicode = CMap_cache_get(tounicode_id); + else { + tounicode = CMap_new(); + CMap_set_name (tounicode, tounicode_name); + CMap_set_type (tounicode, CMAP_TYPE_TO_UNICODE); + CMap_set_wmode(tounicode, 0); + CMap_add_codespacerange(tounicode, srange_min, srange_max, 2); + CMap_set_CIDSysInfo(tounicode, &CSI_UNICODE); + CMap_add_bfchar(tounicode, srange_min, 2, srange_max, 2); + tounicode_id = CMap_cache_add(tounicode); + } + RELEASE(tounicode_name); + + for (gid = 0; gid < num_glyphs; gid++) { + uint16_t cid = GIDToCIDMap[gid]; + unsigned char src[2], dst[4]; + if (cid > 0) { + int32_t ch = UC_is_valid(map_base[gid]) ? map_base[gid] : map_sub[gid]; + if (UC_is_valid(ch)) { + unsigned char *p = dst; + unsigned char *endptr = dst + 4; + size_t len; + src[0] = (cid >> 8) & 0xff; + src[1] = cid & 0xff; + len = UC_UTF16BE_encode_char(ch, &p, endptr); + if (len > 0) { + CMap_add_bfchar(tounicode, src, 2, dst, len); + } + } + } + } + } + cmap_id = CMap_cache_add(cmap); } - cmap_id = load_base_CMap(cmap_name, tounicode_add, wmode, - (is_cidfont ? &csi : NULL), GIDToCIDMap, - gsub_vert, gsub_list, ttcmap); - if (cmap_id < 0) - ERROR("Failed to read OpenType/TrueType cmap table."); - if (gsub_vert) - otl_gsub_release(gsub_vert); - gsub_vert = NULL; - if (gsub_list) - otl_gsub_release(gsub_list); - gsub_list = NULL; RELEASE(cmap_name); - if (GIDToCIDMap) - RELEASE(GIDToCIDMap); - if (is_cidfont) { - if (csi.registry) - RELEASE(csi.registry); - if (csi.ordering) - RELEASE(csi.ordering); - } - tt_cmap_release(ttcmap); + RELEASE(GIDToCIDMap); + if (csi.registry) + RELEASE(csi.registry); + if (csi.ordering) + RELEASE(csi.ordering); sfnt_close(sfont); DPXFCLOSE(fp); @@ -1515,14 +1585,11 @@ int otf_try_load_GID_to_CID_map (const char *map_name, int ttc_index, int wmode) { - int cmap_id = -1; - sfnt *sfont; - ULONG offset = 0; - char *cmap_name = NULL; - FILE *fp = NULL; - CIDSysInfo csi = {NULL, NULL, 0}; - int is_cidfont = 0; - unsigned char *GIDToCIDMap = NULL; + int cmap_id = -1; + sfnt *sfont = NULL; + ULONG offset = 0; + char *cmap_name = NULL; + FILE *fp = NULL; if (!map_name) return -1; @@ -1559,13 +1626,20 @@ } if (!sfont) { - ERROR("Could not open OpenType/TrueType/dfont font file \"%s\"", map_name); + WARN("Could not open OpenType/TrueType/dfont font file \"%s\"", map_name); + RELEASE(cmap_name); + DPXFCLOSE(fp); + return -1; } switch (sfont->type) { case SFNT_TYPE_TTC: offset = ttc_read_offset(sfont, ttc_index); if (offset == 0) { - ERROR("Invalid TTC index"); + WARN("Invalid TTC index for font \"%s\": %d", map_name, ttc_index); + sfnt_close(sfont); + DPXFCLOSE(fp); + RELEASE(cmap_name); + return -1; } break; case SFNT_TYPE_TRUETYPE: @@ -1576,12 +1650,20 @@ offset = sfont->offset; break; default: - ERROR("Not a OpenType/TrueType/TTC font?: %s", map_name); - break; + WARN("Not a OpenType/TrueType/TTC font?: %s", map_name); + sfnt_close(sfont); + DPXFCLOSE(fp); + RELEASE(cmap_name); + return -1; } - if (sfnt_read_table_directory(sfont, offset) < 0) - ERROR("Could not read OpenType/TrueType table directory."); + if (sfnt_read_table_directory(sfont, offset) < 0) { + WARN("Could not read OpenType/TrueType table directory: %s", map_name); + sfnt_close(sfont); + DPXFCLOSE(fp); + RELEASE(cmap_name); + return -1; + } if (sfont->type != SFNT_TYPE_POSTSCRIPT) { RELEASE(cmap_name); sfnt_close(sfont); @@ -1590,41 +1672,71 @@ } /* Read GID-to-CID mapping if CFF OpenType is found. */ - is_cidfont = handle_CIDFont(sfont, &GIDToCIDMap, &csi); - if (is_cidfont) { - if (GIDToCIDMap) { - CMap *cmap; - int32_t gid; - const unsigned char csrange[4] = {0x00, 0x00, 0xff, 0xff}; + if (sfont->type == SFNT_TYPE_POSTSCRIPT) { + cff_font *cffont; + struct tt_maxp_table *maxp; + const unsigned char csrange[4] = {0x00, 0x00, 0xff, 0xff}; + uint16_t num_glyphs = 0; + + maxp = tt_read_maxp_table(sfont); + num_glyphs = (card16) maxp->numGlyphs; + RELEASE(maxp); + + offset = sfnt_find_table_pos(sfont, "CFF "); + cffont = cff_open(sfont->stream, offset, 0); + if (cffont && cffont->flag & FONTTYPE_CIDFONT) { + CMap *cmap; + uint16_t gid; + uint16_t *GIDToCIDMap = NULL; + CIDSysInfo csi = {NULL, NULL, 0}; + + if (!cff_dict_known(cffont->topdict, "ROS")) { + csi.registry = strdup("Adobe"); + csi.ordering = strdup("Identity"); + csi.supplement = 0; + } else { + card16 reg, ord; + reg = (card16) cff_dict_get(cffont->topdict, "ROS", 0); + ord = (card16) cff_dict_get(cffont->topdict, "ROS", 1); + csi.registry = cff_get_string(cffont, reg); + csi.ordering = cff_get_string(cffont, ord); + csi.supplement = (int) cff_dict_get(cffont->topdict, "ROS", 2); + } + cff_read_charsets(cffont); + GIDToCIDMap = NEW(num_glyphs, uint16_t); + memset(GIDToCIDMap, 0, num_glyphs*sizeof(uint16_t)); + create_GIDToCIDMap(GIDToCIDMap, num_glyphs, cffont); cmap = CMap_new(); CMap_set_name (cmap, cmap_name); CMap_set_type (cmap, CMAP_TYPE_CODE_TO_CID); CMap_set_wmode(cmap, wmode); CMap_add_codespacerange(cmap, &csrange[0], &csrange[2], 2); CMap_set_CIDSysInfo(cmap, &csi); - - for (gid = 0; gid < 65536; gid++) { - unsigned char src[2]; + for (gid = 0; gid < num_glyphs; gid++) { + unsigned char src[2], dst[2]; src[0] = (gid >> 8) & 0xff; src[1] = gid & 0xff; - CMap_add_bfchar(cmap, src, 2, &GIDToCIDMap[gid*2], 2); + dst[0] = (GIDToCIDMap[gid] >> 8) & 0xff; + dst[1] = GIDToCIDMap[gid] & 0xff; + CMap_add_bfchar(cmap, src, 2, dst, 2); } cmap_id = CMap_cache_add(cmap); if (dpx_conf.verbose_level > VERBOSE_LEVEL_MIN) { MESG("\n"); MESG("otf_cmap>> Creating GID-to-CID mapping for font=\"%s\"\n", map_name); } + RELEASE(GIDToCIDMap); + if (csi.registry) + RELEASE(csi.registry); + if (csi.ordering) + RELEASE(csi.ordering); } - /* Identity mapping for null GIDToCIDMap */ + if (cffont) + cff_close(cffont); } + RELEASE(cmap_name); - if (GIDToCIDMap) - RELEASE(GIDToCIDMap); - if (csi.registry) - RELEASE(csi.registry); - if (csi.ordering) - RELEASE(csi.ordering); sfnt_close(sfont); DPXFCLOSE(fp); diff -Naur a/texk/dvipdfm-x/tt_gsub.c b/texk/dvipdfm-x/tt_gsub.c --- a/texk/dvipdfm-x/tt_gsub.c 2018-12-21 03:39:51.000000000 +0000 +++ b/texk/dvipdfm-x/tt_gsub.c 2019-05-31 22:00:04.009964032 +0100 @@ -1,6 +1,6 @@ /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks. - Copyright (C) 2002-2018 by Jin-Hwan Cho and Shunsaku Hirata, + Copyright (C) 2002-2019 by Jin-Hwan Cho and Shunsaku Hirata, the dvipdfmx project team. This program is free software; you can redistribute it and/or modify @@ -977,10 +977,11 @@ sfnt_seek_set(sfont, offset); clt_read_feature_table(&feature_table, sfont); +#if 0 if (feature_table.FeatureParams != 0) { ERROR("unrecognized FeatureParams"); } - +#endif /* Lookup table */ for (i = 0; i < feature_table.LookupListIndex.count; i++) { struct clt_lookup_table lookup_table; @@ -1680,98 +1681,303 @@ return retval; } -#if 0 +#if 1 +#include "unicode.h" + +#ifndef is_used_char2 +#define is_used_char2(b,c) (((b)[(c)/8]) & (1 << (7-((c)%8)))) +#endif + +static int +add_glyph_if_valid (CMap *cmap, char *used_chars, + int32_t *map_base, int32_t *map_sub, USHORT num_glyphs, + uint16_t *GIDToCIDMap, USHORT gid, USHORT gid_sub) +{ + int count = 0; + unsigned char src[2], dst[4]; + unsigned char *p = dst, *endptr = dst + 4; + size_t len; + uint16_t cid_sub; + + if (gid_sub >= num_glyphs || gid >= num_glyphs) + return 0; + + cid_sub = GIDToCIDMap[gid_sub]; + if (is_used_char2(used_chars, cid_sub)) { + int32_t ch = map_base[gid]; + if (UC_is_valid(ch)) { + src[0] = (cid_sub >> 8) & 0xff; + src[1] = cid_sub & 0xff; + len = UC_UTF16BE_encode_char(ch, &p, endptr); + CMap_add_bfchar(cmap, src, 2, dst, len); + used_chars[cid_sub / 8] &= ~(1 << (7 - (cid_sub % 8))); + count = 1; + } else { + ch = map_sub[gid]; + if (UC_is_valid(ch)) { + src[0] = (cid_sub >> 8) & 0xff; + src[1] = cid_sub & 0xff; + len = UC_UTF16BE_encode_char(ch, &p, endptr); + CMap_add_bfchar(cmap, src, 2, dst, len); + used_chars[cid_sub / 8] &= ~(1 << (7 - (cid_sub % 8))); + count = 1; + } + } + } + return count; +} + static int -otl_gsub_dump_single (struct otl_gsub_subtab *subtab) +add_ToUnicode_single (CMap *cmap, char *used_chars, + struct otl_gsub_subtab *subtab, + int32_t *map_base, int32_t *map_sub, USHORT num_glyphs, + uint16_t *GIDToCIDMap) { - int gid, idx; + int count = 0; + USHORT i, idx, gid; + USHORT gid_sub; ASSERT(subtab); if (subtab->SubstFormat == 1) { struct otl_gsub_single1 *data; + struct clt_coverage *cov; data = (subtab->table).single1; - for (gid = 0; gid < 0x10000; gid++) { - idx = clt_lookup_coverage(&data->coverage, gid); - if (idx >= 0) { - fprintf(stdout, "substitute \\%u by \\%u;\n", - (USHORT) gid, (USHORT) (gid + data->DeltaGlyphID)); + cov = &data->coverage; + switch (cov->format) { + case 1: /* list */ + for (idx = 0; idx < cov->count; idx++) { + gid = cov->list[idx]; + gid_sub = gid + data->DeltaGlyphID; + count += add_glyph_if_valid(cmap, used_chars, + map_base, map_sub, num_glyphs, + GIDToCIDMap, gid, gid_sub); } + break; + case 2: /* range */ + for (i = 0; i < cov->count; i++) { + for (gid = cov->range[i].Start; + gid <= cov->range[i].End && gid < num_glyphs; gid++) { + idx = cov->range[i].StartCoverageIndex + gid - cov->range[i].Start; + gid_sub = gid + data->DeltaGlyphID; + count += add_glyph_if_valid(cmap, used_chars, + map_base, map_sub, num_glyphs, + GIDToCIDMap, gid, gid_sub); + } + } + break; } } else if (subtab->SubstFormat == 2) { struct otl_gsub_single2 *data; + struct clt_coverage *cov; data = (subtab->table).single2; - for (gid = 0; gid < 0x10000; gid++) { - idx = clt_lookup_coverage(&data->coverage, gid); - if (idx >= 0 && - idx < data->GlyphCount) { - fprintf(stdout, "substitute \\%u by \\%u;\n", - (USHORT) gid, (data->Substitute)[idx]); + cov = &data->coverage; + switch (cov->format) { + case 1: /* list */ + for (idx = 0; idx < cov->count; idx++) { + gid = cov->list[idx]; + if (idx >= 0 && idx < data->GlyphCount) { + gid_sub = (data->Substitute)[idx]; + count += add_glyph_if_valid(cmap, used_chars, + map_base, map_sub, num_glyphs, + GIDToCIDMap, gid, gid_sub); + } } + break; + case 2: /* range */ + for (i = 0; i < cov->count; i++) { + for (gid = cov->range[i].Start; + gid <= cov->range[i].End && gid < num_glyphs; gid++) { + idx = cov->range[i].StartCoverageIndex + gid - cov->range[i].Start; + if (idx >= 0 && idx < data->GlyphCount) { + gid_sub = (data->Substitute)[idx]; + count += add_glyph_if_valid(cmap, used_chars, + map_base, map_sub, num_glyphs, + GIDToCIDMap, gid, gid_sub); + } + } + } + break; } } - return 0; + return count; } -static int -otl_gsub_dump_alternate (struct otl_gsub_subtab *subtab) +static int32_t +add_alternate1_inverse_map (CMap *cmap, char *used_chars, + int32_t *map_base, int32_t *map_sub, USHORT num_glyphs, + uint16_t *GIDToCIDMap, USHORT gid, int idx, + struct otl_gsub_alternate1 *data) { - int gid, idx; + int32_t count = 0; + + if (idx >= 0 && idx < data->AlternateSetCount) { + struct otl_gsub_altset *altset; + USHORT i; + + altset = &(data->AlternateSet[idx]); + if (altset->GlyphCount == 0) + return count; + for (i = 0; i < altset->GlyphCount; i++) { + USHORT gid_alt = altset->Alternate[i]; + count += add_glyph_if_valid(cmap, used_chars, + map_base, map_sub, num_glyphs, + GIDToCIDMap, gid, gid_alt); + } + } + return count; +} + +static int32_t +add_ToUnicode_alternate (CMap *cmap, char *used_chars, + struct otl_gsub_subtab *subtab, + int32_t *map_base, int32_t *map_sub, USHORT num_glyphs, + uint16_t *GIDToCIDMap) +{ + int32_t count = 0; + USHORT i, gid, idx; ASSERT(subtab); if (subtab->SubstFormat == 1) { struct otl_gsub_alternate1 *data; - + struct clt_coverage *cov; data = subtab->table.alternate1; - for (gid = 0; gid < 0x10000; gid++) { - idx = clt_lookup_coverage(&data->coverage, gid); - if (idx >= 0 && idx < data->AlternateSetCount) { - struct otl_gsub_altset *altset; - USHORT i; - altset = &(data->AlternateSet[idx]); - if (altset->GlyphCount == 0) - continue; - fprintf(stdout, "substitute \\%u from [", (USHORT) gid); - for (i = 0; i < altset->GlyphCount; i++) { - fprintf(stdout, " \\%u", altset->Alternate[i]); + cov = &data->coverage; + switch (cov->format) { + case 1: /* list */ + for (idx = 0; idx < cov->count; idx++) { + gid = cov->list[idx]; + if (gid < num_glyphs) { + count += add_alternate1_inverse_map(cmap, used_chars, + map_base, map_sub, num_glyphs, + GIDToCIDMap, gid, idx, data); + } + } + break; + case 2: /* range */ + for (i = 0; i < cov->count; i++) { + for (gid = cov->range[i].Start; + gid <= cov->range[i].End && gid < num_glyphs; gid++) { + idx = cov->range[i].StartCoverageIndex + gid - cov->range[i].Start; + count += add_alternate1_inverse_map(cmap, used_chars, + map_base, map_sub, num_glyphs, + GIDToCIDMap, gid, idx, data); } - fprintf(stdout, " ];\n"); } + break; } } + return count; +} - return 0; +static int32_t +add_ligature1_inverse_map (CMap *cmap, char *used_chars, + int32_t *map_base, int32_t *map_sub, USHORT num_glyphs, + uint16_t *GIDToCIDMap, USHORT gid_1, int idx, + struct otl_gsub_ligature1 *data) +{ + int32_t count = 0; + + if (idx >= 0 && idx < data->LigSetCount) { + struct otl_gsub_ligset *ligset; + USHORT i, j; + + ligset = &(data->LigatureSet[idx]); + for (j = 0; j < ligset->LigatureCount; j++) { + USHORT gid_sub = ligset->Ligature[j].LigGlyph; + if (gid_sub < num_glyphs) { + uint16_t cid = GIDToCIDMap[gid_sub]; + if (is_used_char2(used_chars, cid)) { + int32_t ch, *ucv; + USHORT comp_count = ligset->Ligature[j].CompCount; + int fail_count = 0; + + ucv = NEW(comp_count, int32_t); + ch = UC_is_valid(map_base[gid_1]) ? map_base[gid_1] : map_sub[gid_1]; + ucv[0] = ch; + fail_count += UC_is_valid(ch) ? 0 : 1; + for (i = 0; i < ligset->Ligature[j].CompCount - 1; i++) { + USHORT gid = ligset->Ligature[j].Component[i]; + if (gid < num_glyphs) { + ch = UC_is_valid(map_base[gid]) ? map_base[gid] : map_sub[gid]; + ucv[i+1] = ch; + fail_count += UC_is_valid(ch) ? 0 : 1; + } else { + fail_count += 1; + } + } + if (fail_count == 0) { + unsigned char src[2], *dst; + unsigned char *p, *endptr; + size_t len = 0; + + src[0] = (cid >> 8) & 0xff; + src[1] = cid & 0xff; + dst = NEW(comp_count*4, unsigned char); + p = dst; + endptr = dst + comp_count * 4; + for (i = 0; i < comp_count; i++) { + len += UC_UTF16BE_encode_char(ucv[i], &p, endptr); + } + CMap_add_bfchar(cmap, src, 2, dst, len); + used_chars[cid / 8] &= ~(1 << (7 - (cid % 8))); + count++; + RELEASE(dst); + } + RELEASE(ucv); + } + } + } + } + + return count; } -static int -otl_gsub_dump_ligature (struct otl_gsub_subtab *subtab) +static int32_t +add_ToUnicode_ligature (CMap *cmap, char *used_chars, + struct otl_gsub_subtab *subtab, + int32_t *map_base, int32_t *map_sub, USHORT num_glyphs, + uint16_t *GIDToCIDMap) { - int gid, idx; + int32_t count = 0; + USHORT i, idx, gid; ASSERT(subtab); if (subtab->SubstFormat == 1) { struct otl_gsub_ligature1 *data; + struct clt_coverage *cov; data = subtab->table.ligature1; - for (gid = 0; gid < 0x10000; gid++) { - idx = clt_lookup_coverage(&data->coverage, gid); - if (idx >= 0 && idx < data->LigSetCount) { - struct otl_gsub_ligset *ligset; - USHORT i, j; - ligset = &(data->LigatureSet[idx]); - for (j = 0; j < ligset->LigatureCount; j++) { - fprintf(stdout, "substitute \\%u", (USHORT) gid); - for (i = 0; i < ligset->Ligature[j].CompCount - 1; i++) { - fprintf(stdout, " \\%u", ligset->Ligature[j].Component[i]); + cov = &data->coverage; + switch (cov->format) { + case 1: /* list */ + for (idx = 0; idx < cov->count; idx++) { + gid = cov->list[idx]; + if (gid < num_glyphs) { + count += add_ligature1_inverse_map(cmap, used_chars, + map_base, map_sub, num_glyphs, + GIDToCIDMap, gid, idx, data); + } + } + break; + case 2: /* range */ + for (i = 0; i < cov->count; i++) { + for (gid = cov->range[i].Start; + gid <= cov->range[i].End && gid < num_glyphs; gid++) { + idx = cov->range[i].StartCoverageIndex + gid - cov->range[i].Start; + if (gid < num_glyphs) { + count += add_ligature1_inverse_map(cmap, used_chars, + map_base, map_sub, num_glyphs, + GIDToCIDMap, gid, idx, data); } - fprintf(stdout, " by \\%u;\n", ligset->Ligature[j].LigGlyph); } } + break; } } @@ -1779,48 +1985,44 @@ } int -otl_gsub_dump (otl_gsub *gsub_list, - const char *script, const char *language, const char *feature) +otl_gsub_add_ToUnicode (CMap *cmap, char *used_chars, + int32_t *map_base, int32_t *map_sub, USHORT num_glyphs, + uint16_t *GIDToCIDMap, sfnt *sfont) { - int error = -1; + int count = 0; + otl_gsub *gsub_list; struct otl_gsub_tab *gsub; struct otl_gsub_subtab *subtab; - int sel, i, j; - - if (!gsub_list) - return -1; + int i, j; - sel = gsub_list->select; - error = otl_gsub_select(gsub_list, script, language, feature); - if (error < 0) { - ERROR("GSUB feature %s.%s.%s not found.", script, language, feature); - } - - i = gsub_list->select; - if (i < 0 || i >= gsub_list->num_gsubs) { - ERROR("GSUB not selected..."); - return -1; - } - gsub = &(gsub_list->gsubs[i]); + gsub_list = otl_gsub_new(); + otl_gsub_add_feat(gsub_list, "*", "*", "*", sfont); - for (j = 0; - !error && - j < gsub->num_subtables; j++) { - subtab = &(gsub->subtables[j]); - switch ((int) subtab->LookupType){ - case OTL_GSUB_TYPE_SINGLE: - error = otl_gsub_dump_single(subtab); - break; - case OTL_GSUB_TYPE_ALTERNATE: - error = otl_gsub_dump_alternate(subtab); - break; - case OTL_GSUB_TYPE_LIGATURE: - error = otl_gsub_dump_ligature(subtab); - break; + for (i = 0; i < gsub_list->num_gsubs; i++) { + gsub = &(gsub_list->gsubs[i]); + for (j = 0; j < gsub->num_subtables; j++) { + subtab = &(gsub->subtables[j]); + switch ((int) subtab->LookupType){ + case OTL_GSUB_TYPE_SINGLE: + count += add_ToUnicode_single(cmap, used_chars, subtab, + map_base, map_sub, num_glyphs, + GIDToCIDMap); + break; + case OTL_GSUB_TYPE_ALTERNATE: + count += add_ToUnicode_alternate(cmap, used_chars, subtab, + map_base, map_sub, num_glyphs, + GIDToCIDMap); + break; + case OTL_GSUB_TYPE_LIGATURE: + count += add_ToUnicode_ligature(cmap, used_chars, subtab, + map_base, map_sub, num_glyphs, + GIDToCIDMap); + break; + } } } - gsub_list->select = sel; + otl_gsub_release(gsub_list); - return error; + return count; } #endif diff -Naur a/texk/dvipdfm-x/tt_gsub.h b/texk/dvipdfm-x/tt_gsub.h --- a/texk/dvipdfm-x/tt_gsub.h 2018-09-14 04:34:50.000000000 +0100 +++ b/texk/dvipdfm-x/tt_gsub.h 2019-05-31 22:00:04.009964032 +0100 @@ -1,6 +1,6 @@ /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks. - Copyright (C) 2002-2018 by Jin-Hwan Cho and Shunsaku Hirata, + Copyright (C) 2002-2019 by Jin-Hwan Cho and Shunsaku Hirata, the dvipdfmx project team. This program is free software; you can redistribute it and/or modify @@ -23,6 +23,7 @@ #include "sfnt.h" #include "otl_opt.h" +#include "cmap.h" typedef struct otl_gsub otl_gsub; @@ -59,11 +60,7 @@ extern int otl_gsub_set_chain (otl_gsub *gsub_list, const char *otl_tags); extern int otl_gsub_apply_chain (otl_gsub *gsub_list, USHORT *gid); -#if 0 -extern int otl_gsub_dump (otl_gsub *gsub_list, - const char *script, - const char *language, - const char *feature); -#endif - +extern int otl_gsub_add_ToUnicode (CMap *cmap, char *used_chars, + int32_t *map_base, int32_t *map_sub, USHORT num_glyphs, + uint16_t *GIDToCIDMap, sfnt *sfont); #endif /* _TT_GSUB_H_ */ diff -Naur a/texk/dvipdfm-x/unicode.c b/texk/dvipdfm-x/unicode.c --- a/texk/dvipdfm-x/unicode.c 2016-01-06 10:13:28.000000000 +0000 +++ b/texk/dvipdfm-x/unicode.c 2019-05-31 22:00:04.009964032 +0100 @@ -1,6 +1,6 @@ /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks. - Copyright (C) 2002-2016 by Jin-Hwan Cho and Shunsaku Hirata, + Copyright (C) 2002-2019 by Jin-Hwan Cho and Shunsaku Hirata, the dvipdfmx project team. Copyright (C) 1998, 1999 by Mark A. Wicks @@ -123,7 +123,7 @@ unsigned char *p = *pp; if (ucv >= 0 && ucv <= 0xFFFF) { - if (p + 2 >= endptr) + if (p + 2 > endptr) return 0; p[0] = (ucv >> 8) & 0xff; p[1] = ucv & 0xff; @@ -131,7 +131,7 @@ } else if (ucv >= 0x010000 && ucv <= 0x10FFFF) { unsigned short high, low; - if (p + 4 >= endptr) + if (p + 4 > endptr) return 0; ucv -= 0x00010000; high = (ucv >> UC_SUR_SHIFT) + UC_SUR_HIGH_START; @@ -142,7 +142,7 @@ p[3] = (low & 0xff); count = 4; } else { - if (p + 2 >= endptr) + if (p + 2 > endptr) return 0; p[0] = (UC_REPLACEMENT_CHAR >> 8) & 0xff; p[1] = (UC_REPLACEMENT_CHAR & 0xff); @@ -207,25 +207,25 @@ return 0; if (ucv < 0x7f) { - if (p >= endptr - 1) + if (p + 1 > endptr) return 0; p[0] = (unsigned char) ucv; count = 1; } else if (ucv <= 0x7ff) { - if (p >= endptr -2) + if (p + 2 > endptr) return 0; p[0] = (unsigned char) (0xc0 | (ucv >> 6)); p[1] = (unsigned char) (0x80 | (ucv & 0x3f)); count = 2; } else if (ucv <= 0xffff) { - if (p >= endptr - 3) + if (p + 3 > endptr) return 0; p[0] = (unsigned char) (0xe0 | (ucv >> 12)); p[1] = (unsigned char) (0x80 | ((ucv >> 6) & 0x3f)); p[2] = (unsigned char) (0x80 | (ucv & 0x3f)); count = 3; } else if (ucv <= 0x1fffff) { - if (p >= endptr - 4) + if (p + 4 > endptr) return 0; p[0] = (unsigned char) (0xf0 | (ucv >> 18)); p[1] = (unsigned char) (0x80 | ((ucv >> 12) & 0x3f)); @@ -233,7 +233,7 @@ p[3] = (unsigned char) (0x80 | (ucv & 0x3f)); count = 4; } else if (ucv <= 0x3ffffff) { - if (p >= endptr - 5) + if (p + 5 > endptr) return 0; p[0] = (unsigned char) (0xf8 | (ucv >> 24)); p[1] = (unsigned char) (0x80 | ((ucv >> 18) & 0x3f)); @@ -242,7 +242,7 @@ p[4] = (unsigned char) (0x80 | (ucv & 0x3f)); count = 5; } else if (ucv <= 0x7fffffff) { - if (p >= endptr - 6) + if (p + 6 > endptr) return 0; p[0] = (unsigned char) (0xfc | (ucv >> 30)); p[1] = (unsigned char) (0x80 | ((ucv >> 24) & 0x3f)); diff -Naur a/texk/dvipsk/ChangeLog b/texk/dvipsk/ChangeLog --- a/texk/dvipsk/ChangeLog 2019-04-07 02:42:55.000000000 +0100 +++ b/texk/dvipsk/ChangeLog 2019-05-31 22:00:04.009964032 +0100 @@ -1,3 +1,9 @@ +2019-04-30 Karl Berry + + * dosection.c (dosection): close PostScript string constant + for long filenames. tex-k mail from Arnaud Blouin, + 24 Apr 2019 13:54:10. + 2019-04-07 Karl Berry * TeX Live 2019. diff -Naur a/texk/dvipsk/dosection.c b/texk/dvipsk/dosection.c --- a/texk/dvipsk/dosection.c 2019-03-30 01:50:10.000000000 +0000 +++ b/texk/dvipsk/dosection.c 2019-05-31 22:00:04.009964032 +0100 @@ -23,7 +23,7 @@ int np; int k; integer thispage = 0; - char buf[104]; + char buf[300]; /* really 253 */ dopsfont(s); #ifdef HPS @@ -40,7 +40,9 @@ doubleout(mag); numout((integer)DPI); numout((integer)VDPI); - snprintf(buf, sizeof(buf), "(%.500s)", fulliname); + /* possibly lines in eps files are supposed to be <= 255; + not worth testing the limits merely to output a long file name. */ + snprintf(buf, sizeof(buf), "(%.250s)", fulliname); cmdout(buf); newline(); cmdout("@start"); diff -Naur a/texk/web2c/ptexdir/ChangeLog b/texk/web2c/ptexdir/ChangeLog --- a/texk/web2c/ptexdir/ChangeLog 2019-02-06 11:01:31.000000000 +0000 +++ b/texk/web2c/ptexdir/ChangeLog 2019-05-31 22:00:04.010964033 +0100 @@ -1,3 +1,11 @@ +2019-05-06 Hironori Kitagawa + + * ptex-base.ch: + Make appropriate comparison of U+0100 by \if in upTeX. + https://github.com/texjporg/tex-jp-build/issues/68 + Re-eval kcatcode of Japanese character token in \if and \ifcat. + https://github.com/texjporg/ptex-manual/issues/4 + 2019-02-03 Hironori Kitagawa * ptex-base.ch: Ignore newline char after Japanese control diff -Naur a/texk/web2c/ptexdir/ptex-base.ch b/texk/web2c/ptexdir/ptex-base.ch --- a/texk/web2c/ptexdir/ptex-base.ch 2019-02-06 11:00:54.000000000 +0000 +++ b/texk/web2c/ptexdir/ptex-base.ch 2019-05-31 22:00:04.010964033 +0100 @@ -59,6 +59,7 @@ % (2017-09-07) HK pTeX p3.7.2 More restrictions on direction change commands. % (2018-01-21) HK Added \ptexversion primitive and co. pTeX p3.8. % (2018-04-14) HK pTeX p3.8.1 Bug fix for discontinuous KINSOKU table. +% (2019-02-03) HK pTeX p3.8.2 Change \inhibitglue, add \disinhibitglue. % @x @@ -324,6 +325,13 @@ wterm(')'); @z +@x +@d max_halfword==@"FFFFFFF {largest allowable value in a |halfword|} +@y +@d max_halfword==@"FFFFFFF {largest allowable value in a |halfword|} +@d max_cjk_val=@"10000 +@z + @x [8.111] l.2436 - pTeX: check hi/ho (mem_top+sup_main_memory>=max_halfword) then bad:=14; @y @@ -2533,19 +2541,19 @@ end; @y if (cur_cmd=kanji)or(cur_cmd=kana)or(cur_cmd=other_kchar) then - begin m:=cur_cmd; n:=cur_chr; + begin n:=cur_chr; m:=kcat_code(kcatcodekey(n)); end else if (cur_cmd>active_char)or(cur_chr>255) then - begin m:=relax; n:=256; + begin m:=relax; n:=max_cjk_val; end else begin m:=cur_cmd; n:=cur_chr; end; get_x_token_or_active_char; if (cur_cmd=kanji)or(cur_cmd=kana)or(cur_cmd=other_kchar) then - begin cur_cmd:=cur_cmd; - end {dummy} + begin cur_cmd:=kcat_code(kcatcodekey(cur_chr)); + end else if (cur_cmd>active_char)or(cur_chr>255) then - begin cur_cmd:=relax; cur_chr:=256; + begin cur_cmd:=relax; cur_chr:=max_cjk_val; end; @z diff -Naur a/texk/web2c/uptexdir/ChangeLog b/texk/web2c/uptexdir/ChangeLog --- a/texk/web2c/uptexdir/ChangeLog 2019-02-23 01:59:36.000000000 +0000 +++ b/texk/web2c/uptexdir/ChangeLog 2019-05-31 22:00:04.010964033 +0100 @@ -1,3 +1,28 @@ +2019-05-25 TANAKA Takuji + + * uptex-m.ch: + Correct upTeX_revision ".25", upTeX_version_string "-u1.25". + +2019-05-06 TANAKA Takuji + + * uptex-m.ch, upbibtex.ch, updvitype.ch, uppltotf.ch, uptftopl.ch, + uptex_version.h: upTeX version u1.25. + * kanji.c, kanji.h: + Fix bug of kcatcode at Fullwidth ASCII variants and + Halfwidth Katakana variants from Yusuke Terada san: + https://github.com/texjporg/tex-jp-build/pull/79 + Set default internal encoding EUC/SJIS if a command name is + with prefix of "p" or "ep", intending to be compatible with + pTeX family (ptex, eptex, pbibtex, pdvitype, ppltotf, ptftopl) + (experimental). + +2019-05-06 Hironori Kitagawa + + * uptex-m.ch: + Make appropreate comparison of U+0100 by \if. + https://github.com/texjporg/tex-jp-build/issues/68 + * tests/test_if.tex: Test case. + 2019-02-23 TANAKA Takuji * uptex-m.ch, upbibtex.ch, updvitype.ch, uppltotf.ch, uptftopl.ch, @@ -24,7 +49,7 @@ 2018-09-16 TANAKA Takuji * upbibtex.ch: Fix bug of substring$ - from Takashi Sakai: + from Takashi Sakai san: https://github.com/texjporg/tex-jp-build/issues/64 https://github.com/texjporg/tex-jp-build/pull/66 diff -Naur a/texk/web2c/uptexdir/kanji.c b/texk/web2c/uptexdir/kanji.c --- a/texk/web2c/uptexdir/kanji.c 2019-02-23 01:59:36.000000000 +0000 +++ b/texk/web2c/uptexdir/kanji.c 2019-05-31 22:00:04.010964033 +0100 @@ -444,7 +444,7 @@ || (LATIN_SMALL_LETTER_O_WITH_STROKE <=c && c<=LATIN_SMALL_LETTER_Y_WITH_DIAERESIS ) ) return 0x1FD; } - if (block==0xa0) { + if (block==0xa1) { /* Fullwidth ASCII variants except for U+FF01..FF0F, U+FF1A..FF20, U+FF3B..FF40, U+FF5B..FF5E */ if ( (FULLWIDTH_DIGIT_0 <=c && c<=FULLWIDTH_DIGIT_9 ) || (FULLWIDTH_CAPITAL_A<=c && c<=FULLWIDTH_CAPITAL_Z) @@ -485,8 +485,6 @@ { char *p; - enable_UPTEX (true); /* enable */ - init_kanji (file_str, internal_str); p = getenv ("PTEX_KANJI_ENC"); @@ -504,3 +502,33 @@ } #endif } + +void init_default_kanji_select(void) +{ + char *base; + + base = kpse_program_basename (argv[0]); + + if (FILESTRNCASEEQ(base, "p", 1) || FILESTRNCASEEQ(base, "ep", 2)) { + + enable_UPTEX (false); /* disable */ +#if defined(WIN32) +/* pBibTeX is EUC only */ + if (FILESTRNCASEEQ(base, "pbibtex", 7)) { + init_default_kanji(NULL, "euc"); + } else { +/* for pTeX, e-pTeX, pDVItype, pPLtoTF, and pTFtoPL */ + init_default_kanji(NULL, "sjis"); + } +#else + init_default_kanji(NULL, "euc"); +#endif + + } else { + +/* for upTeX, e-upTeX, upBibTeX, upDVItype, upPLtoTF, and upTFtoPL */ + enable_UPTEX (true); /* enable */ + init_default_kanji ("utf8", "uptex"); + + } +} diff -Naur a/texk/web2c/uptexdir/kanji.h b/texk/web2c/uptexdir/kanji.h --- a/texk/web2c/uptexdir/kanji.h 2019-02-06 11:01:31.000000000 +0000 +++ b/texk/web2c/uptexdir/kanji.h 2019-05-31 22:00:04.010964033 +0100 @@ -38,8 +38,9 @@ extern integer multilenbuffchar (integer c); extern void init_default_kanji (const_string file_str, const_string internal_str); +extern void init_default_kanji_select (void); /* for upTeX, e-upTeX, upBibTeX, upDVItype, upPLtoTF, and upTFtoPL */ -#define initkanji() init_default_kanji("utf8", "uptex") +#define initkanji() init_default_kanji_select() /* for upDVItype */ #define setpriorfileenc() set_prior_file_enc() diff -Naur a/texk/web2c/uptexdir/tests/test_if.tex b/texk/web2c/uptexdir/tests/test_if.tex --- a/texk/web2c/uptexdir/tests/test_if.tex 1970-01-01 01:00:00.000000000 +0100 +++ b/texk/web2c/uptexdir/tests/test_if.tex 2019-05-31 22:00:04.010964033 +0100 @@ -0,0 +1,29 @@ +\kcatcode`あ=18 +\def\xA{あ}\let\yA=あ +\kcatcode`あ=17 +\def\xB{あ}\let\yB=あ +\kcatcode`あ=16 + +\message{\ifcat あ\xA Y\else N\fi} +\message{\ifcat あ\yA Y\else N\fi} +\message{\ifcat あ\xB Y\else N\fi} +\message{\ifcat あ\yB Y\else N\fi} + +\message{\if あ\xA Y\else N\fi} +\message{\if あ\yA Y\else N\fi} +\message{\if い\xA Y\else N\fi} +\message{\if い\yA Y\else N\fi} + +\ifx\ucs\undefined\else + \kcatcode"100=16 + \message{upTeX} + \def\xA{Ā}% U+0100 + \def\xB{ā}% U+0101 + \message{\if \xA\relax Y\else N\fi} + \message{\if \xB\relax Y\else N\fi} + \message{\ifcat\xA\relax Y\else N\fi} + \message{\ifcat\xB\relax Y\else N\fi} +\fi +\end + + diff -Naur a/texk/web2c/uptexdir/upbibtex.ch b/texk/web2c/uptexdir/upbibtex.ch --- a/texk/web2c/uptexdir/upbibtex.ch 2019-02-23 01:59:36.000000000 +0000 +++ b/texk/web2c/uptexdir/upbibtex.ch 2019-05-31 22:00:04.010964033 +0100 @@ -3,7 +3,7 @@ @d banner=='This is pBibTeX, Version 0.99d-j0.33' @y @d my_name=='upbibtex' -@d banner=='This is upBibTeX, Version 0.99d-j0.33-u1.24' +@d banner=='This is upBibTeX, Version 0.99d-j0.33-u1.25' @z @x diff -Naur a/texk/web2c/uptexdir/updvitype.ch b/texk/web2c/uptexdir/updvitype.ch --- a/texk/web2c/uptexdir/updvitype.ch 2019-02-23 01:59:36.000000000 +0000 +++ b/texk/web2c/uptexdir/updvitype.ch 2019-05-31 22:00:04.010964033 +0100 @@ -3,7 +3,7 @@ @d banner=='This is pDVItype, Version 3.6-p0.4' @y @d my_name=='updvitype' -@d banner=='This is upDVItype, Version 3.6-p0.4-u1.24' +@d banner=='This is upDVItype, Version 3.6-p0.4-u1.25' @z @x procedure initialize diff -Naur a/texk/web2c/uptexdir/uppltotf.ch b/texk/web2c/uptexdir/uppltotf.ch --- a/texk/web2c/uptexdir/uppltotf.ch 2019-02-23 01:59:36.000000000 +0000 +++ b/texk/web2c/uptexdir/uppltotf.ch 2019-05-31 22:00:04.010964033 +0100 @@ -3,7 +3,7 @@ @d banner=='This is pPLtoTF, Version 3.6-p2.0' @y @d my_name=='uppltotf' -@d banner=='This is upPLtoTF, Version 3.6-p2.0-u1.24' +@d banner=='This is upPLtoTF, Version 3.6-p2.0-u1.25' @z @x diff -Naur a/texk/web2c/uptexdir/uptex-m.ch b/texk/web2c/uptexdir/uptex-m.ch --- a/texk/web2c/uptexdir/uptex-m.ch 2019-02-23 01:59:36.000000000 +0000 +++ b/texk/web2c/uptexdir/uptex-m.ch 2019-05-31 22:00:04.010964033 +0100 @@ -1,4 +1,4 @@ -% This is a change file for upTeX u1.24 +% This is a change file for upTeX u1.25 % By Takuji Tanaka. % % (02/26/2007) TTK upTeX u0.01 @@ -39,6 +39,8 @@ % (2018-01-21) HK Added \uptexversion primitive and co. % (2018-02-24) TTK upTeX u1.23 % (2019-02-23) TTK upTeX u1.24 +% (2019-05-06) HK Hironori Kitagawa fixed a bug in \if. +% (2019-05-06) TTK upTeX u1.25 @x upTeX: banner {printed when \pTeX\ starts} @@ -46,8 +48,8 @@ {printed when \pTeX\ starts} @# @d upTeX_version=1 -@d upTeX_revision==".24" -@d upTeX_version_string=='-u1.24' {current u\pTeX\ version} +@d upTeX_revision==".25" +@d upTeX_version_string=='-u1.25' {current u\pTeX\ version} @# @d upTeX_banner=='This is upTeX, Version 3.14159265',pTeX_version_string,upTeX_version_string @d upTeX_banner_k==upTeX_banner @@ -142,6 +144,7 @@ @d max_quarterword=255 {largest allowable value in a |quarterword|} @d min_halfword==-@"FFFFFFF {smallest allowable value in a |halfword|} @d max_halfword==@"FFFFFFF {largest allowable value in a |halfword|} +@d max_cjk_val=@"10000 @y @d min_quarterword=0 {smallest allowable value in a |quarterword|} @d max_quarterword=@"FFFF {largest allowable value in a |quarterword|} @@ -699,16 +702,24 @@ @x if (cur_cmd=kanji)or(cur_cmd=kana)or(cur_cmd=other_kchar) then + begin n:=cur_chr; m:=kcat_code(kcatcodekey(n)); + end @y if (cur_cmd>=kanji)and(cur_cmd<=hangul) then + begin m:=cur_cmd; n:=cur_chr; + end @z @x get_x_token_or_active_char; if (cur_cmd=kanji)or(cur_cmd=kana)or(cur_cmd=other_kchar) then + begin cur_cmd:=kcat_code(kcatcodekey(cur_chr)); + end @y get_x_token_or_active_char; if (cur_cmd>=kanji)and(cur_cmd<=hangul) then + begin cur_cmd:=cur_cmd; + end {dummy} @z @x diff -Naur a/texk/web2c/uptexdir/uptex_version.h b/texk/web2c/uptexdir/uptex_version.h --- a/texk/web2c/uptexdir/uptex_version.h 2019-02-23 01:59:36.000000000 +0000 +++ b/texk/web2c/uptexdir/uptex_version.h 2019-05-31 22:00:04.010964033 +0100 @@ -1 +1 @@ -#define UPTEX_VERSION "u1.24" +#define UPTEX_VERSION "u1.25" diff -Naur a/texk/web2c/uptexdir/uptftopl.ch b/texk/web2c/uptexdir/uptftopl.ch --- a/texk/web2c/uptexdir/uptftopl.ch 2019-02-23 01:59:36.000000000 +0000 +++ b/texk/web2c/uptexdir/uptftopl.ch 2019-05-31 22:00:04.010964033 +0100 @@ -3,7 +3,7 @@ @d banner=='This is pTFtoPL, Version 3.3-p2.0' @y @d my_name=='uptftopl' -@d banner=='This is upTFtoPL, Version 3.3-p2.0-u1.24' +@d banner=='This is upTFtoPL, Version 3.3-p2.0-u1.25' @z @x diff -Naur a/texk/web2c/xetexdir/ChangeLog b/texk/web2c/xetexdir/ChangeLog --- a/texk/web2c/xetexdir/ChangeLog 2019-01-02 22:41:45.000000000 +0000 +++ b/texk/web2c/xetexdir/ChangeLog 2019-05-31 22:02:30.345042172 +0100 @@ -1,3 +1,8 @@ +2019-05-30 Khaled Hosny + + * XeTeXLayoutInterface.cpp: Do not use hb-icu if HarfBuzz + version is 2.5.0 or newer. + 2019-01-03 Akira Kakuto * NEWS, xetex_version.h, xetex.web: Sync with the upstream. diff -Naur a/texk/web2c/xetexdir/XeTeXLayoutInterface.cpp b/texk/web2c/xetexdir/XeTeXLayoutInterface.cpp --- a/texk/web2c/xetexdir/XeTeXLayoutInterface.cpp 2017-03-12 08:47:36.000000000 +0000 +++ b/texk/web2c/xetexdir/XeTeXLayoutInterface.cpp 2019-05-31 22:05:06.636170781 +0100 @@ -2,7 +2,7 @@ Part of the XeTeX typesetting system Copyright (c) 1994-2008 by SIL International Copyright (c) 2009-2012 by Jonathan Kew - Copyright (c) 2012-2015 by Khaled Hosny + Copyright (c) 2012-2019 by Khaled Hosny SIL Author(s): Jonathan Kew @@ -39,8 +39,11 @@ #include #include +#include #include +#if !HB_VERSION_ATLEAST(2,5,0) #include +#endif #include #include "XeTeX_web.h" @@ -661,6 +664,7 @@ free(engine->shaper); } +#if !HB_VERSION_ATLEAST(2,5,0) static unsigned int _decompose_compat(hb_unicode_funcs_t* ufuncs, hb_codepoint_t u, @@ -677,8 +681,7 @@ hb_unicode_funcs_set_decompose_compatibility_func(ufuncs, _decompose_compat, NULL, NULL); return ufuncs; } - -static hb_unicode_funcs_t* hbUnicodeFuncs = NULL; +#endif int layoutChars(XeTeXLayoutEngine engine, uint16_t chars[], int32_t offset, int32_t count, int32_t max, @@ -699,11 +702,15 @@ script = hb_ot_tag_to_script (engine->script); + hb_buffer_reset(engine->hbBuffer); + +#if !HB_VERSION_ATLEAST(2,5,0) + static hb_unicode_funcs_t* hbUnicodeFuncs = NULL; if (hbUnicodeFuncs == NULL) hbUnicodeFuncs = _get_unicode_funcs(); - - hb_buffer_reset(engine->hbBuffer); hb_buffer_set_unicode_funcs(engine->hbBuffer, hbUnicodeFuncs); +#endif + hb_buffer_add_utf16(engine->hbBuffer, chars, max, offset, count); hb_buffer_set_direction(engine->hbBuffer, direction); hb_buffer_set_script(engine->hbBuffer, script);