looper/subprojects/mpg123/src/local.c

	/*
	local: some stuff for localisation, safe terminal printout

	This is about determining if we got UTF-8 locale and
	checking output terminal properties, along with subsequent string
	transformations for safe printout.

	copyright 2008-2021 by the mpg123 project - free software under the terms of the LGPL 2.1
	see COPYING and AUTHORS files in distribution or http://mpg123.org
	initially written by Thomas Orgis, based on a patch by Thorsten Glaser.
*/

// wchar stuff
#define _XOPEN_SOURCE 600
#define _POSIX_C_SOURCE 200112L

#include "config.h"

#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
#ifdef HAVE_LANGINFO_H
#include <langinfo.h>
#endif
#include "compat/compat.h"

#include "local.h"

#ifdef HAVE_WCHAR_H
#include <wchar.h>
#endif
#ifdef HAVE_WCTYPE_H
#include <wctype.h>
#endif

#ifdef WIN32
#define WIN32_LEAN_AND_MEAN 1
#include <windows.h>
#include <wincon.h>
#endif

#include "common/debug.h"

int utf8force = 0; // enforce UTF-8 workings
int utf8env = 0; // produce UTF-8 text output
int utf8loc = 0; // have actual UTF-8 locale (so that mbstowcs() works)

//static int term_is_fun = -1;

static const char joker_symbol = '?';
static const char *uni_repl = "\xef\xbf\xbd";
static const int uni_repl_len = 3;

/* Check some language variable for UTF-8-ness. */
static int is_utf8(const char *lang);

void check_locale(void)
{
	if(utf8force)
		utf8env = 1;
	else
	{
		const char *cp;

		/* Check for env vars in proper oder. */
		if((cp = getenv("LC_ALL")) == NULL && (cp = getenv("LC_CTYPE")) == NULL)
		cp = getenv("LANG");

		if(is_utf8(cp))
			utf8env = 1;
	}

#if defined(HAVE_SETLOCALE) && defined(LC_CTYPE)
	/* To query, we need to set from environment... */
	if(
		   is_utf8(setlocale(LC_CTYPE, ""))
		// If enforced, try to set an UTF-8 locale that hopefully exists.
		|| (utf8force && is_utf8(setlocale(LC_CTYPE, "C.UTF-8")))
		|| (utf8force && is_utf8(setlocale(LC_CTYPE, "en_US.UTF-8")))
	)
	{
		utf8env = 1;
		utf8loc = 1;
	}
#endif
#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
	/* ...langinfo works after we set a locale, eh? So it makes sense after setlocale, if only. */
	if(is_utf8(nl_langinfo(CODESET)))
	{
		utf8env = 1;
		utf8loc = 1;
	}
#endif

	debug2("UTF-8 env %i: locale: %i", utf8env, utf8loc);
}

static int is_utf8(const char *lang)
{
	if(lang == NULL) return 0;

	/* Now, if the variable mentions UTF-8 anywhere, in some variation, the locale is UTF-8. */
	if(   strstr(lang, "UTF-8") || strstr(lang, "utf-8")
	   || strstr(lang, "UTF8")  || strstr(lang, "utf8")  )
	return 1;
	else
	return 0;
}

// Moved encoding stuff over from metaprint.c and removed references to libmpg123,
// meaning no mpg123_string for you!

int unknown2utf8(char **dest, const char *source, int len)
{
	if(!dest)
		return -1;
	if(!source)
	{
		*dest = INT123_safer_realloc(*dest, 0);
		return -1;
	}
	size_t count = len < 0 ? strlen(source) : (size_t)len;
	// Make a somewhat proper UTF-8 string out of this. Testing for valid
	// UTF-8 is futile. It will be some unspecified legacy 8-bit encoding.
	// I am keeping C0 chars, but replace everything above 7 bits with
	// the Unicode replacement character as most custom 8-bit encodings
	// placed some symbols into the C1 range, we just don't know which.
	size_t ulen = 1; // trailing zero
	for(size_t i=0; i<count; ++i)
	{
		unsigned char c = ((unsigned char*)source)[i];
		if(!c)
			break;
		ulen += c >= 0x80 ? uni_repl_len : 1;
	}

	if(NULL == (*dest = INT123_safer_realloc(*dest, ulen)))
		return -1;

	unsigned char *p = (unsigned char*)*dest;
	for(size_t i=0; i<count; ++i)
	{
		unsigned char c = ((unsigned char*)source)[i];
		if(!c)
			break;
		if(c >= 0x80)
		{
			for(int r=0; r<uni_repl_len; ++r)
				*p++ = uni_repl[r];
		}
		else
			*p++ = c;
	}
	*p = 0;
	return 0;
}

static void ascii_space(unsigned char *c, int *wasspace)
{
	switch(*c)
	{
		case '\f':
		case '\r':
		case '\n':
		case '\t':
		case '\v':
			if(!*wasspace)
				*c = ' '; // Will be dropped by < 0x20 check otherwise.
			*wasspace = 1;
		break;
		default:
			*wasspace = 0;
	}
}

// Filter C1 control chars, using c2lead state.
#define ASCII_C1(c, append) \
	if(c2lead) \
	{ \
		if((c) >= 0x80 && (c) <= 0x9f) \
		{ \
			c2lead = 0; \
			continue; \
		} \
		else \
		{ \
			append; \
		} \
	} \
	c2lead = ((c) == 0xc2); \
	if(c2lead) \
		continue;

// return: strlen+1 of result, 0 on error
// If ret is 0, *dest will be freed and NULL.

static size_t utf8_ascii_work(char **dest_, const char *source
,	int keep_nonprint)
{
	if(!dest_)
		return 0;
	if(!source)
	{
		*dest_ = INT123_safer_realloc(*dest_, 0);
		return 0;
	}

	char *dest = *dest_;
	size_t source_fill = strlen(source)+1;
	size_t spos = 0;
	size_t dlen = 1; // At least a zero.
	unsigned char *p;

	// Find length of ASCII string (count non-continuation bytes).
	// Do _not_ change this to mpg123_strlen()!
	// It needs to match the loop below. 
	// No UTF-8 continuation byte 0x10??????, nor control char.
#define ASCII_PRINT_SOMETHING(c) \
	(((c) & 0xc0) != 0x80 && (keep_nonprint || ((c) != 0x7f && (c) >= 0x20)))
	int c2lead = 0;
	int wasspace = 0;
	for(spos=0; spos < source_fill; ++spos)
	{
		unsigned char c = ((unsigned char*)source)[spos];
		if(!keep_nonprint)
			ascii_space(&c, &wasspace);
		ASCII_C1(c, ++dlen);
		if(ASCII_PRINT_SOMETHING(c))
			++dlen;
	}
	// Do nothing with nothing or if allocation fails. Neatly catches overflow
	// of ++dlen.
	if(!dlen || !(dest=INT123_safer_realloc(dest, dlen)))
		goto utf8_ascii_bad;
	p = (unsigned char*)dest;
	c2lead = 0;
	wasspace = 0;
	for(spos=0; spos < source_fill; ++spos)
	{
		unsigned char c = ((unsigned char*)source)[spos];
		if(!keep_nonprint)
			ascii_space(&c, &wasspace);
		ASCII_C1(c, *p++ = joker_symbol)
		if(!ASCII_PRINT_SOMETHING(c))
			continue;
		else if(c & 0x80) // UTF-8 lead byte 0x11??????
			c = joker_symbol;
		*p++ = c;
	}
#undef ASCII_PRINT_SOMETHING
	// Always close the string.
	if(dlen)
		dest[dlen-1] = 0;
	goto utf8_ascii_end;
utf8_ascii_bad:
	dest = INT123_safer_realloc(dest, 0);
utf8_ascii_end:
	*dest_ = dest;
	return dest ? strlen(dest)+1 : 0;
}

// Reduce UTF-8 data to 7-bit ASCII, dropping non-printable characters.
// Non-printable ASCII == everything below 0x20 (space), including
// line breaks.
// Also: 0x7f (DEL) and the C1 chars. The C0 and C1 chars should just be
// dropped, not rendered. Or should they?
static size_t utf8_ascii_print(char **dest, const char *source)
{
	return utf8_ascii_work(dest, source, 0);
}

// Same as above, but keeping non-printable and control chars in the
// 7 bit realm.
static size_t utf8_ascii(char **dest, const char *source)
{
	return utf8_ascii_work(dest, source, 1);
}

size_t utf8outstr(char **dest_, const char *source, int to_terminal)
{
	if(!dest_)
		return 0;
	if(!source)
	{
		*dest_ = INT123_safer_realloc(*dest_, 0);
		return 0;
	}
	char *dest = *dest_;
	size_t width = 0;
	size_t source_fill = strlen(source)+1;

	if(utf8env)
	{
#if defined(HAVE_MBSTOWCS) && defined(HAVE_WCSWIDTH) && \
    defined(HAVE_ISWPRINT) && defined(HAVE_WCSTOMBS)
		if(utf8loc && to_terminal)
		{
			// Best case scenario: Convert to wide string, filter,
			// compute printing width.
			size_t wcharlen = mbstowcs(NULL, source, 0);
			if(wcharlen == (size_t)-1)
				goto utf8outstr_bad;
			if(wcharlen+1 > SIZE_MAX/sizeof(wchar_t))
				goto utf8outstr_bad;
			wchar_t *pre = malloc(sizeof(wchar_t)*(wcharlen+1));
			wchar_t *flt = malloc(sizeof(wchar_t)*(wcharlen+1));
			if(!pre || !flt)
			{
				free(flt);
				free(pre);
				goto utf8outstr_bad;
			}
			if(mbstowcs(pre, source, wcharlen+1) == wcharlen)
			{
				size_t nwl = 0;
				int wasspace = 0;
				for(size_t i=0;  i<wcharlen; ++i)
				{
					// Turn any funky space sequence (including line breaks) into
					// one normal space.
					if(iswspace(pre[i]) && pre[i] != ' ')
					{
						if(!wasspace)
							flt[nwl++] = ' ';
						wasspace = 1;
					} else // Anything non-printing is skipped.
					{
						if(iswprint(pre[i]))
							flt[nwl++] = pre[i];
						wasspace = 0;
					}
				}
				flt[nwl] = 0;
				int columns = wcswidth(flt, nwl);
				size_t bytelen = wcstombs(NULL, flt, 0);
				if(
					columns >= 0 && bytelen != (size_t)-1
					&& (dest=INT123_safer_realloc(dest, bytelen+1))
					&& wcstombs(dest, flt, bytelen+1) == bytelen
				){
					width = columns;
				}
				else
					dest=INT123_safer_realloc(dest, 0);
			}
			free(flt);
			free(pre);
		}
		else
#endif
		if(to_terminal)
		{
			// Only filter C0 and C1 control characters.
			// That is, 0x01 to 0x19 (keeping 0x20, space) and 0x7f (DEL) to 0x9f.
			// Since the input and output is UTF-8, we'll keep that intact.
			// C1 is mapped to 0xc280 till 0xc29f.
			dest = INT123_safer_realloc(dest, source_fill);
			if(!dest)
				goto utf8outstr_bad;
			size_t dest_fill = 0;
			int c2lead = 0;
			int wasspace = 0;
			unsigned char *p = (unsigned char*)dest;
			for(size_t i=0; i<source_fill; ++i)
			{
				unsigned char c = ((unsigned char*)source)[i];
				ascii_space(&c, &wasspace);
				ASCII_C1(c, *p++ = 0xc2)
				if(c && c < 0x20)
					continue; // no C0 control chars, except space
				if(c == 0x7f)
					continue; // also no DEL
				*p++ = c;
				if(!c)
					break; // Up to zero is enough.
				// Assume each 7 bit char and each sequence start make one character.
				// So only continuation bytes need to be ignored.
				if((c & 0xc0) != 0x80)
					++width;
			}
			// Make damn sure that it ends.
			dest_fill = (char*)p - dest;
			dest[dest_fill-1] = 0;
		} else
		{
			dest = INT123_safer_realloc(dest, source_fill);
			if(!dest)
				goto utf8outstr_bad;
			size_t dest_fill = 0;
			unsigned char *p = (unsigned char*)dest;
			for(size_t i=0; i<source_fill; ++i)
			{
				unsigned char c = ((unsigned char*)source)[i];
				*p++ = c;
				if(!c)
					break; // Up to zero is enough.
				// Actual width should not matter that much for non-terminal,
				// as we should use less formatting in that case, but anyway.
				if((c & 0xc0) != 0x80)
					++width;
			}
			dest_fill = (char*)p - dest;
			dest[dest_fill-1] = 0;
		}
	} else if(to_terminal)
	{
		// Last resort: just 7-bit ASCII.
		width = to_terminal
		?	utf8_ascii_print(&dest, source)
		:	utf8_ascii(&dest, source);
		if(!width)
			goto utf8outstr_bad;
		--width;
	}

	goto utf8outstr_end;
utf8outstr_bad:
	dest = INT123_safer_realloc(dest, 0);
	width = 0;
utf8outstr_end:
	*dest_ = dest;
	return width;
}

#undef ASCII_C1

// I tried saving some malloc using provided work buffers, but
// realized that the path of Unicode transformations is so full
// of them regardless.
// Can this include all the necessary logic?
// - If UTF-8 input: Use utf8outstr(), which includes terminal switch.
// - If not:
// -- If terminal: construct safe UTF-8, pass on to outstr().
// -- If not: assume env encoding, unprocessed string that came
//    from the environment.

int outstr(char **dest, const char *str, int is_utf8, int is_term)
{
	if(!dest)
		return -1;
	if(!str)
	{
		*dest = INT123_safer_realloc(*dest, 0);
		return -1;
	}
	int ret = 0;
	if(is_utf8 || utf8env)
	{
		utf8outstr(dest, str, is_term);
		if(*dest)
			ret = -1;
	} else if(is_term)
	{
		char *usrc = NULL;
		ret = unknown2utf8(&usrc, str, -1);
		if(!ret)
		{
			utf8outstr(dest, usrc, is_term);
			if(!*dest)
				ret = -1;
		}
		free(usrc);
	} else
	{
		*dest = INT123_compat_strdup(str);
		if(!*dest)
			ret = -1;
	}
	return ret;
}

int print_outstr(FILE *out, const char *str, int is_utf8, int is_term)
{
	int ret = 0;
	if(!str)
		return -1;
	char *outbuf = NULL;
	ret = outstr(&outbuf, str, is_utf8, is_term);
	if(outbuf)
	{
		ret = fprintf(out, "%s", outbuf);
		free(outbuf);
	}
	return ret;
}
Make it work 2024-09-28 10:31:06 -07:00			`/*`
			`local: some stuff for localisation, safe terminal printout`

			`This is about determining if we got UTF-8 locale and`
			`checking output terminal properties, along with subsequent string`
			`transformations for safe printout.`

			`copyright 2008-2021 by the mpg123 project - free software under the terms of the LGPL 2.1`
			`see COPYING and AUTHORS files in distribution or http://mpg123.org`
			`initially written by Thomas Orgis, based on a patch by Thorsten Glaser.`
			`*/`

			`// wchar stuff`
			`#define _XOPEN_SOURCE 600`
			`#define _POSIX_C_SOURCE 200112L`

			`#include "config.h"`

			`#ifdef HAVE_LOCALE_H`
			`#include <locale.h>`
			`#endif`
			`#ifdef HAVE_LANGINFO_H`
			`#include <langinfo.h>`
			`#endif`
			`#include "compat/compat.h"`

			`#include "local.h"`

			`#ifdef HAVE_WCHAR_H`
			`#include <wchar.h>`
			`#endif`
			`#ifdef HAVE_WCTYPE_H`
			`#include <wctype.h>`
			`#endif`

			`#ifdef WIN32`
			`#define WIN32_LEAN_AND_MEAN 1`
			`#include <windows.h>`
			`#include <wincon.h>`
			`#endif`

			`#include "common/debug.h"`

			`int utf8force = 0; // enforce UTF-8 workings`
			`int utf8env = 0; // produce UTF-8 text output`
			`int utf8loc = 0; // have actual UTF-8 locale (so that mbstowcs() works)`

			`//static int term_is_fun = -1;`

			`static const char joker_symbol = '?';`
			`static const char *uni_repl = "\xef\xbf\xbd";`
			`static const int uni_repl_len = 3;`

			`/* Check some language variable for UTF-8-ness. */`
			`static int is_utf8(const char *lang);`

			`void check_locale(void)`
			`{`
			`if(utf8force)`
			`utf8env = 1;`
			`else`
			`{`
			`const char *cp;`

			`/* Check for env vars in proper oder. */`
			`if((cp = getenv("LC_ALL")) == NULL && (cp = getenv("LC_CTYPE")) == NULL)`
			`cp = getenv("LANG");`

			`if(is_utf8(cp))`
			`utf8env = 1;`
			`}`

			`#if defined(HAVE_SETLOCALE) && defined(LC_CTYPE)`
			`/* To query, we need to set from environment... */`
			`if(`
			`is_utf8(setlocale(LC_CTYPE, ""))`
			`// If enforced, try to set an UTF-8 locale that hopefully exists.`
			`\|\| (utf8force && is_utf8(setlocale(LC_CTYPE, "C.UTF-8")))`
			`\|\| (utf8force && is_utf8(setlocale(LC_CTYPE, "en_US.UTF-8")))`
			`)`
			`{`
			`utf8env = 1;`
			`utf8loc = 1;`
			`}`
			`#endif`
			`#if defined(HAVE_NL_LANGINFO) && defined(CODESET)`
			`/* ...langinfo works after we set a locale, eh? So it makes sense after setlocale, if only. */`
			`if(is_utf8(nl_langinfo(CODESET)))`
			`{`
			`utf8env = 1;`
			`utf8loc = 1;`
			`}`
			`#endif`

			`debug2("UTF-8 env %i: locale: %i", utf8env, utf8loc);`
			`}`

			`static int is_utf8(const char *lang)`
			`{`
			`if(lang == NULL) return 0;`

			`/* Now, if the variable mentions UTF-8 anywhere, in some variation, the locale is UTF-8. */`
			`if( strstr(lang, "UTF-8") \|\| strstr(lang, "utf-8")`
			`\|\| strstr(lang, "UTF8") \|\| strstr(lang, "utf8") )`
			`return 1;`
			`else`
			`return 0;`
			`}`

			`// Moved encoding stuff over from metaprint.c and removed references to libmpg123,`
			`// meaning no mpg123_string for you!`

			`int unknown2utf8(char *dest, const char source, int len)`
			`{`
			`if(!dest)`
			`return -1;`
			`if(!source)`
			`{`
			`dest = INT123_safer_realloc(dest, 0);`
			`return -1;`
			`}`
			`size_t count = len < 0 ? strlen(source) : (size_t)len;`
			`// Make a somewhat proper UTF-8 string out of this. Testing for valid`
			`// UTF-8 is futile. It will be some unspecified legacy 8-bit encoding.`
			`// I am keeping C0 chars, but replace everything above 7 bits with`
			`// the Unicode replacement character as most custom 8-bit encodings`
			`// placed some symbols into the C1 range, we just don't know which.`
			`size_t ulen = 1; // trailing zero`
			`for(size_t i=0; i<count; ++i)`
			`{`
			`unsigned char c = ((unsigned char*)source)[i];`
			`if(!c)`
			`break;`
			`ulen += c >= 0x80 ? uni_repl_len : 1;`
			`}`

			`if(NULL == (dest = INT123_safer_realloc(dest, ulen)))`
			`return -1;`

			`unsigned char p = (unsigned char)*dest;`
			`for(size_t i=0; i<count; ++i)`
			`{`
			`unsigned char c = ((unsigned char*)source)[i];`
			`if(!c)`
			`break;`
			`if(c >= 0x80)`
			`{`
			`for(int r=0; r<uni_repl_len; ++r)`
			`*p++ = uni_repl[r];`
			`}`
			`else`
			`*p++ = c;`
			`}`
			`*p = 0;`
			`return 0;`
			`}`

			`static void ascii_space(unsigned char c, int wasspace)`
			`{`
			`switch(*c)`
			`{`
			`case '\f':`
			`case '\r':`
			`case '\n':`
			`case '\t':`
			`case '\v':`
			`if(!*wasspace)`
			`*c = ' '; // Will be dropped by < 0x20 check otherwise.`
			`*wasspace = 1;`
			`break;`
			`default:`
			`*wasspace = 0;`
			`}`
			`}`

			`// Filter C1 control chars, using c2lead state.`
			`#define ASCII_C1(c, append) \`
			`if(c2lead) \`
			`{ \`
			`if((c) >= 0x80 && (c) <= 0x9f) \`
			`{ \`
			`c2lead = 0; \`
			`continue; \`
			`} \`
			`else \`
			`{ \`
			`append; \`
			`} \`
			`} \`
			`c2lead = ((c) == 0xc2); \`
			`if(c2lead) \`
			`continue;`

			`// return: strlen+1 of result, 0 on error`
			`// If ret is 0, *dest will be freed and NULL.`

			`static size_t utf8_ascii_work(char *dest_, const char source`
			`, int keep_nonprint)`
			`{`
			`if(!dest_)`
			`return 0;`
			`if(!source)`
			`{`
			`dest_ = INT123_safer_realloc(dest_, 0);`
			`return 0;`
			`}`

			`char dest = dest_;`
			`size_t source_fill = strlen(source)+1;`
			`size_t spos = 0;`
			`size_t dlen = 1; // At least a zero.`
			`unsigned char *p;`

			`// Find length of ASCII string (count non-continuation bytes).`
			`// Do _not_ change this to mpg123_strlen()!`
			`// It needs to match the loop below.`
			`// No UTF-8 continuation byte 0x10??????, nor control char.`
			`#define ASCII_PRINT_SOMETHING(c) \`
			`(((c) & 0xc0) != 0x80 && (keep_nonprint \|\| ((c) != 0x7f && (c) >= 0x20)))`
			`int c2lead = 0;`
			`int wasspace = 0;`
			`for(spos=0; spos < source_fill; ++spos)`
			`{`
			`unsigned char c = ((unsigned char*)source)[spos];`
			`if(!keep_nonprint)`
			`ascii_space(&c, &wasspace);`
			`ASCII_C1(c, ++dlen);`
			`if(ASCII_PRINT_SOMETHING(c))`
			`++dlen;`
			`}`
			`// Do nothing with nothing or if allocation fails. Neatly catches overflow`
			`// of ++dlen.`
			`if(!dlen \|\| !(dest=INT123_safer_realloc(dest, dlen)))`
			`goto utf8_ascii_bad;`
			`p = (unsigned char*)dest;`
			`c2lead = 0;`
			`wasspace = 0;`
			`for(spos=0; spos < source_fill; ++spos)`
			`{`
			`unsigned char c = ((unsigned char*)source)[spos];`
			`if(!keep_nonprint)`
			`ascii_space(&c, &wasspace);`
			`ASCII_C1(c, *p++ = joker_symbol)`
			`if(!ASCII_PRINT_SOMETHING(c))`
			`continue;`
			`else if(c & 0x80) // UTF-8 lead byte 0x11??????`
			`c = joker_symbol;`
			`*p++ = c;`
			`}`
			`#undef ASCII_PRINT_SOMETHING`
			`// Always close the string.`
			`if(dlen)`
			`dest[dlen-1] = 0;`
			`goto utf8_ascii_end;`
			`utf8_ascii_bad:`
			`dest = INT123_safer_realloc(dest, 0);`
			`utf8_ascii_end:`
			`*dest_ = dest;`
			`return dest ? strlen(dest)+1 : 0;`
			`}`

			`// Reduce UTF-8 data to 7-bit ASCII, dropping non-printable characters.`
			`// Non-printable ASCII == everything below 0x20 (space), including`
			`// line breaks.`
			`// Also: 0x7f (DEL) and the C1 chars. The C0 and C1 chars should just be`
			`// dropped, not rendered. Or should they?`
			`static size_t utf8_ascii_print(char *dest, const char source)`
			`{`
			`return utf8_ascii_work(dest, source, 0);`
			`}`

			`// Same as above, but keeping non-printable and control chars in the`
			`// 7 bit realm.`
			`static size_t utf8_ascii(char *dest, const char source)`
			`{`
			`return utf8_ascii_work(dest, source, 1);`
			`}`

			`size_t utf8outstr(char *dest_, const char source, int to_terminal)`
			`{`
			`if(!dest_)`
			`return 0;`
			`if(!source)`
			`{`
			`dest_ = INT123_safer_realloc(dest_, 0);`
			`return 0;`
			`}`
			`char dest = dest_;`
			`size_t width = 0;`
			`size_t source_fill = strlen(source)+1;`

			`if(utf8env)`
			`{`
			`#if defined(HAVE_MBSTOWCS) && defined(HAVE_WCSWIDTH) && \`
			`defined(HAVE_ISWPRINT) && defined(HAVE_WCSTOMBS)`
			`if(utf8loc && to_terminal)`
			`{`
			`// Best case scenario: Convert to wide string, filter,`
			`// compute printing width.`
			`size_t wcharlen = mbstowcs(NULL, source, 0);`
			`if(wcharlen == (size_t)-1)`
			`goto utf8outstr_bad;`
			`if(wcharlen+1 > SIZE_MAX/sizeof(wchar_t))`
			`goto utf8outstr_bad;`
			`wchar_t pre = malloc(sizeof(wchar_t)(wcharlen+1));`
			`wchar_t flt = malloc(sizeof(wchar_t)(wcharlen+1));`
			`if(!pre \|\| !flt)`
			`{`
			`free(flt);`
			`free(pre);`
			`goto utf8outstr_bad;`
			`}`
			`if(mbstowcs(pre, source, wcharlen+1) == wcharlen)`
			`{`
			`size_t nwl = 0;`
			`int wasspace = 0;`
			`for(size_t i=0; i<wcharlen; ++i)`
			`{`
			`// Turn any funky space sequence (including line breaks) into`
			`// one normal space.`
			`if(iswspace(pre[i]) && pre[i] != ' ')`
			`{`
			`if(!wasspace)`
			`flt[nwl++] = ' ';`
			`wasspace = 1;`
			`} else // Anything non-printing is skipped.`
			`{`
			`if(iswprint(pre[i]))`
			`flt[nwl++] = pre[i];`
			`wasspace = 0;`
			`}`
			`}`
			`flt[nwl] = 0;`
			`int columns = wcswidth(flt, nwl);`
			`size_t bytelen = wcstombs(NULL, flt, 0);`
			`if(`
			`columns >= 0 && bytelen != (size_t)-1`
			`&& (dest=INT123_safer_realloc(dest, bytelen+1))`
			`&& wcstombs(dest, flt, bytelen+1) == bytelen`
			`){`
			`width = columns;`
			`}`
			`else`
			`dest=INT123_safer_realloc(dest, 0);`
			`}`
			`free(flt);`
			`free(pre);`
			`}`
			`else`
			`#endif`
			`if(to_terminal)`
			`{`
			`// Only filter C0 and C1 control characters.`
			`// That is, 0x01 to 0x19 (keeping 0x20, space) and 0x7f (DEL) to 0x9f.`
			`// Since the input and output is UTF-8, we'll keep that intact.`
			`// C1 is mapped to 0xc280 till 0xc29f.`
			`dest = INT123_safer_realloc(dest, source_fill);`
			`if(!dest)`
			`goto utf8outstr_bad;`
			`size_t dest_fill = 0;`
			`int c2lead = 0;`
			`int wasspace = 0;`
			`unsigned char p = (unsigned char)dest;`
			`for(size_t i=0; i<source_fill; ++i)`
			`{`
			`unsigned char c = ((unsigned char*)source)[i];`
			`ascii_space(&c, &wasspace);`
			`ASCII_C1(c, *p++ = 0xc2)`
			`if(c && c < 0x20)`
			`continue; // no C0 control chars, except space`
			`if(c == 0x7f)`
			`continue; // also no DEL`
			`*p++ = c;`
			`if(!c)`
			`break; // Up to zero is enough.`
			`// Assume each 7 bit char and each sequence start make one character.`
			`// So only continuation bytes need to be ignored.`
			`if((c & 0xc0) != 0x80)`
			`++width;`
			`}`
			`// Make damn sure that it ends.`
			`dest_fill = (char*)p - dest;`
			`dest[dest_fill-1] = 0;`
			`} else`
			`{`
			`dest = INT123_safer_realloc(dest, source_fill);`
			`if(!dest)`
			`goto utf8outstr_bad;`
			`size_t dest_fill = 0;`
			`unsigned char p = (unsigned char)dest;`
			`for(size_t i=0; i<source_fill; ++i)`
			`{`
			`unsigned char c = ((unsigned char*)source)[i];`
			`*p++ = c;`
			`if(!c)`
			`break; // Up to zero is enough.`
			`// Actual width should not matter that much for non-terminal,`
			`// as we should use less formatting in that case, but anyway.`
			`if((c & 0xc0) != 0x80)`
			`++width;`
			`}`
			`dest_fill = (char*)p - dest;`
			`dest[dest_fill-1] = 0;`
			`}`
			`} else if(to_terminal)`
			`{`
			`// Last resort: just 7-bit ASCII.`
			`width = to_terminal`
			`? utf8_ascii_print(&dest, source)`
			`: utf8_ascii(&dest, source);`
			`if(!width)`
			`goto utf8outstr_bad;`
			`--width;`
			`}`

			`goto utf8outstr_end;`
			`utf8outstr_bad:`
			`dest = INT123_safer_realloc(dest, 0);`
			`width = 0;`
			`utf8outstr_end:`
			`*dest_ = dest;`
			`return width;`
			`}`

			`#undef ASCII_C1`

			`// I tried saving some malloc using provided work buffers, but`
			`// realized that the path of Unicode transformations is so full`
			`// of them regardless.`
			`// Can this include all the necessary logic?`
			`// - If UTF-8 input: Use utf8outstr(), which includes terminal switch.`
			`// - If not:`
			`// -- If terminal: construct safe UTF-8, pass on to outstr().`
			`// -- If not: assume env encoding, unprocessed string that came`
			`// from the environment.`

			`int outstr(char *dest, const char str, int is_utf8, int is_term)`
			`{`
			`if(!dest)`
			`return -1;`
			`if(!str)`
			`{`
			`dest = INT123_safer_realloc(dest, 0);`
			`return -1;`
			`}`
			`int ret = 0;`
			`if(is_utf8 \|\| utf8env)`
			`{`
			`utf8outstr(dest, str, is_term);`
			`if(*dest)`
			`ret = -1;`
			`} else if(is_term)`
			`{`
			`char *usrc = NULL;`
			`ret = unknown2utf8(&usrc, str, -1);`
			`if(!ret)`
			`{`
			`utf8outstr(dest, usrc, is_term);`
			`if(!*dest)`
			`ret = -1;`
			`}`
			`free(usrc);`
			`} else`
			`{`
			`*dest = INT123_compat_strdup(str);`
			`if(!*dest)`
			`ret = -1;`
			`}`
			`return ret;`
			`}`

			`int print_outstr(FILE out, const char str, int is_utf8, int is_term)`
			`{`
			`int ret = 0;`
			`if(!str)`
			`return -1;`
			`char *outbuf = NULL;`
			`ret = outstr(&outbuf, str, is_utf8, is_term);`
			`if(outbuf)`
			`{`
			`ret = fprintf(out, "%s", outbuf);`
			`free(outbuf);`
			`}`
			`return ret;`
			`}`