SDL  2.0
SDL_iconv.c File Reference
#include "../SDL_internal.h"
#include "SDL_stdinc.h"
#include "SDL_endian.h"
+ Include dependency graph for SDL_iconv.c:

Go to the source code of this file.

Data Structures

struct  SDL_iconv_t

Macros

#define UNICODE_BOM   0xFEFF
#define UNKNOWN_ASCII   '?'
#define UNKNOWN_UNICODE   0xFFFD
#define ENCODING_UTF16NATIVE   ENCODING_UTF16BE
#define ENCODING_UTF32NATIVE   ENCODING_UTF32BE
#define ENCODING_UCS2NATIVE   ENCODING_UCS2BE
#define ENCODING_UCS4NATIVE   ENCODING_UCS4BE

Enumerations

enum  {
  ENCODING_UNKNOWN,
  ENCODING_ASCII,
  ENCODING_LATIN1,
  ENCODING_UTF8,
  ENCODING_UTF16,
  ENCODING_UTF16BE,
  ENCODING_UTF16LE,
  ENCODING_UTF32,
  ENCODING_UTF32BE,
  ENCODING_UTF32LE,
  ENCODING_UCS2BE,
  ENCODING_UCS2LE,
  ENCODING_UCS4BE,
  ENCODING_UCS4LE
}

Functions

static const char * getlocale (char *buffer, size_t bufsize)
SDL_iconv_t SDL_iconv_open (const char *tocode, const char *fromcode)
size_t SDL_iconv (SDL_iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
int SDL_iconv_close (SDL_iconv_t cd)
char * SDL_iconv_string (const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)

Variables

struct {
   const char *   name
   int   format
encodings []

Macro Definition Documentation

#define ENCODING_UCS2NATIVE   ENCODING_UCS2BE

Definition at line 120 of file SDL_iconv.c.

#define ENCODING_UCS4NATIVE   ENCODING_UCS4BE

Definition at line 121 of file SDL_iconv.c.

#define ENCODING_UTF16NATIVE   ENCODING_UTF16BE

Definition at line 118 of file SDL_iconv.c.

Referenced by SDL_iconv().

#define ENCODING_UTF32NATIVE   ENCODING_UTF32BE

Definition at line 119 of file SDL_iconv.c.

Referenced by SDL_iconv().

#define UNICODE_BOM   0xFEFF

Definition at line 95 of file SDL_iconv.c.

Referenced by SDL_iconv().

#define UNKNOWN_ASCII   '?'

Definition at line 97 of file SDL_iconv.c.

Referenced by SDL_iconv().

#define UNKNOWN_UNICODE   0xFFFD

Definition at line 98 of file SDL_iconv.c.

Referenced by SDL_iconv().

Enumeration Type Documentation

anonymous enum
Enumerator:
ENCODING_UNKNOWN 
ENCODING_ASCII 
ENCODING_LATIN1 
ENCODING_UTF8 
ENCODING_UTF16 
ENCODING_UTF16BE 
ENCODING_UTF16LE 
ENCODING_UTF32 
ENCODING_UTF32BE 
ENCODING_UTF32LE 
ENCODING_UCS2BE 
ENCODING_UCS2LE 
ENCODING_UCS4BE 
ENCODING_UCS4LE 

Definition at line 100 of file SDL_iconv.c.

Function Documentation

static const char* getlocale ( char *  buffer,
size_t  bufsize 
)
static

Definition at line 173 of file SDL_iconv.c.

References NULL, SDL_getenv, SDL_strchr, SDL_strcmp, and SDL_strlcpy.

Referenced by SDL_iconv_open().

{
const char *lang;
char *ptr;
lang = SDL_getenv("LC_ALL");
if (!lang) {
lang = SDL_getenv("LC_CTYPE");
}
if (!lang) {
lang = SDL_getenv("LC_MESSAGES");
}
if (!lang) {
lang = SDL_getenv("LANG");
}
if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
lang = "ASCII";
}
/* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
ptr = SDL_strchr(lang, '.');
if (ptr != NULL) {
lang = ptr + 1;
}
ptr = SDL_strchr(buffer, '@');
if (ptr != NULL) {
*ptr = '\0'; /* chop end of string. */
}
return buffer;
}
size_t SDL_iconv ( SDL_iconv_t  cd,
const char **  inbuf,
size_t inbytesleft,
char **  outbuf,
size_t outbytesleft 
)

Definition at line 248 of file SDL_iconv.c.

References ENCODING_ASCII, ENCODING_LATIN1, ENCODING_UCS2BE, ENCODING_UCS2LE, ENCODING_UCS4BE, ENCODING_UCS4LE, ENCODING_UTF16, ENCODING_UTF16BE, ENCODING_UTF16LE, ENCODING_UTF16NATIVE, ENCODING_UTF32, ENCODING_UTF32BE, ENCODING_UTF32LE, ENCODING_UTF32NATIVE, ENCODING_UTF8, SDL_FALSE, SDL_ICONV_E2BIG, SDL_ICONV_EINVAL, SDL_TRUE, UNICODE_BOM, UNKNOWN_ASCII, and UNKNOWN_UNICODE.

{
/* For simplicity, we'll convert everything to and from UCS-4 */
const char *src;
char *dst;
size_t srclen, dstlen;
Uint32 ch = 0;
size_t total;
if (!inbuf || !*inbuf) {
/* Reset the context */
return 0;
}
if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
}
src = *inbuf;
srclen = (inbytesleft ? *inbytesleft : 0);
dst = *outbuf;
dstlen = *outbytesleft;
switch (cd->src_fmt) {
/* Scan for a byte order marker */
{
Uint8 *p = (Uint8 *) src;
size_t n = srclen / 2;
while (n) {
if (p[0] == 0xFF && p[1] == 0xFE) {
cd->src_fmt = ENCODING_UTF16BE;
break;
} else if (p[0] == 0xFE && p[1] == 0xFF) {
cd->src_fmt = ENCODING_UTF16LE;
break;
}
p += 2;
--n;
}
if (n == 0) {
/* We can't tell, default to host order */
cd->src_fmt = ENCODING_UTF16NATIVE;
}
}
break;
/* Scan for a byte order marker */
{
Uint8 *p = (Uint8 *) src;
size_t n = srclen / 4;
while (n) {
if (p[0] == 0xFF && p[1] == 0xFE &&
p[2] == 0x00 && p[3] == 0x00) {
cd->src_fmt = ENCODING_UTF32BE;
break;
} else if (p[0] == 0x00 && p[1] == 0x00 &&
p[2] == 0xFE && p[3] == 0xFF) {
cd->src_fmt = ENCODING_UTF32LE;
break;
}
p += 4;
--n;
}
if (n == 0) {
/* We can't tell, default to host order */
cd->src_fmt = ENCODING_UTF32NATIVE;
}
}
break;
}
switch (cd->dst_fmt) {
/* Default to host order, need to add byte order marker */
if (dstlen < 2) {
}
*(Uint16 *) dst = UNICODE_BOM;
dst += 2;
dstlen -= 2;
cd->dst_fmt = ENCODING_UTF16NATIVE;
break;
/* Default to host order, need to add byte order marker */
if (dstlen < 4) {
}
*(Uint32 *) dst = UNICODE_BOM;
dst += 4;
dstlen -= 4;
cd->dst_fmt = ENCODING_UTF32NATIVE;
break;
}
total = 0;
while (srclen > 0) {
/* Decode a character */
switch (cd->src_fmt) {
{
Uint8 *p = (Uint8 *) src;
ch = (Uint32) (p[0] & 0x7F);
++src;
--srclen;
}
break;
{
Uint8 *p = (Uint8 *) src;
ch = (Uint32) p[0];
++src;
--srclen;
}
break;
case ENCODING_UTF8: /* RFC 3629 */
{
Uint8 *p = (Uint8 *) src;
size_t left = 0;
SDL_bool overlong = SDL_FALSE;
if (p[0] >= 0xFC) {
if ((p[0] & 0xFE) != 0xFC) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
} else {
if (p[0] == 0xFC && srclen > 1 && (p[1] & 0xFC) == 0x80) {
overlong = SDL_TRUE;
}
ch = (Uint32) (p[0] & 0x01);
left = 5;
}
} else if (p[0] >= 0xF8) {
if ((p[0] & 0xFC) != 0xF8) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
} else {
if (p[0] == 0xF8 && srclen > 1 && (p[1] & 0xF8) == 0x80) {
overlong = SDL_TRUE;
}
ch = (Uint32) (p[0] & 0x03);
left = 4;
}
} else if (p[0] >= 0xF0) {
if ((p[0] & 0xF8) != 0xF0) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
} else {
if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
overlong = SDL_TRUE;
}
ch = (Uint32) (p[0] & 0x07);
left = 3;
}
} else if (p[0] >= 0xE0) {
if ((p[0] & 0xF0) != 0xE0) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
} else {
if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
overlong = SDL_TRUE;
}
ch = (Uint32) (p[0] & 0x0F);
left = 2;
}
} else if (p[0] >= 0xC0) {
if ((p[0] & 0xE0) != 0xC0) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
} else {
if ((p[0] & 0xDE) == 0xC0) {
overlong = SDL_TRUE;
}
ch = (Uint32) (p[0] & 0x1F);
left = 1;
}
} else {
if ((p[0] & 0x80) != 0x00) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
} else {
ch = (Uint32) p[0];
}
}
++src;
--srclen;
if (srclen < left) {
}
while (left--) {
++p;
if ((p[0] & 0xC0) != 0x80) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
break;
}
ch <<= 6;
ch |= (p[0] & 0x3F);
++src;
--srclen;
}
if (overlong) {
/* Potential security risk
return SDL_ICONV_EILSEQ;
*/
}
if ((ch >= 0xD800 && ch <= 0xDFFF) ||
(ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
}
}
break;
case ENCODING_UTF16BE: /* RFC 2781 */
{
Uint8 *p = (Uint8 *) src;
Uint16 W1, W2;
if (srclen < 2) {
}
W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
src += 2;
srclen -= 2;
if (W1 < 0xD800 || W1 > 0xDFFF) {
ch = (Uint32) W1;
break;
}
if (W1 > 0xDBFF) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
break;
}
if (srclen < 2) {
}
p = (Uint8 *) src;
W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
src += 2;
srclen -= 2;
if (W2 < 0xDC00 || W2 > 0xDFFF) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
break;
}
ch = (((Uint32) (W1 & 0x3FF) << 10) |
(Uint32) (W2 & 0x3FF)) + 0x10000;
}
break;
case ENCODING_UTF16LE: /* RFC 2781 */
{
Uint8 *p = (Uint8 *) src;
Uint16 W1, W2;
if (srclen < 2) {
}
W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
src += 2;
srclen -= 2;
if (W1 < 0xD800 || W1 > 0xDFFF) {
ch = (Uint32) W1;
break;
}
if (W1 > 0xDBFF) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
break;
}
if (srclen < 2) {
}
p = (Uint8 *) src;
W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
src += 2;
srclen -= 2;
if (W2 < 0xDC00 || W2 > 0xDFFF) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
break;
}
ch = (((Uint32) (W1 & 0x3FF) << 10) |
(Uint32) (W2 & 0x3FF)) + 0x10000;
}
break;
{
Uint8 *p = (Uint8 *) src;
if (srclen < 2) {
}
ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
src += 2;
srclen -= 2;
}
break;
{
Uint8 *p = (Uint8 *) src;
if (srclen < 2) {
}
ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
src += 2;
srclen -= 2;
}
break;
{
Uint8 *p = (Uint8 *) src;
if (srclen < 4) {
}
ch = ((Uint32) p[0] << 24) |
((Uint32) p[1] << 16) |
((Uint32) p[2] << 8) | (Uint32) p[3];
src += 4;
srclen -= 4;
}
break;
{
Uint8 *p = (Uint8 *) src;
if (srclen < 4) {
}
ch = ((Uint32) p[3] << 24) |
((Uint32) p[2] << 16) |
((Uint32) p[1] << 8) | (Uint32) p[0];
src += 4;
srclen -= 4;
}
break;
}
/* Encode a character */
switch (cd->dst_fmt) {
{
Uint8 *p = (Uint8 *) dst;
if (dstlen < 1) {
}
if (ch > 0x7F) {
} else {
*p = (Uint8) ch;
}
++dst;
--dstlen;
}
break;
{
Uint8 *p = (Uint8 *) dst;
if (dstlen < 1) {
}
if (ch > 0xFF) {
} else {
*p = (Uint8) ch;
}
++dst;
--dstlen;
}
break;
case ENCODING_UTF8: /* RFC 3629 */
{
Uint8 *p = (Uint8 *) dst;
if (ch > 0x10FFFF) {
}
if (ch <= 0x7F) {
if (dstlen < 1) {
}
*p = (Uint8) ch;
++dst;
--dstlen;
} else if (ch <= 0x7FF) {
if (dstlen < 2) {
}
p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
p[1] = 0x80 | (Uint8) (ch & 0x3F);
dst += 2;
dstlen -= 2;
} else if (ch <= 0xFFFF) {
if (dstlen < 3) {
}
p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
p[2] = 0x80 | (Uint8) (ch & 0x3F);
dst += 3;
dstlen -= 3;
} else if (ch <= 0x1FFFFF) {
if (dstlen < 4) {
}
p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
p[3] = 0x80 | (Uint8) (ch & 0x3F);
dst += 4;
dstlen -= 4;
} else if (ch <= 0x3FFFFFF) {
if (dstlen < 5) {
}
p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
p[4] = 0x80 | (Uint8) (ch & 0x3F);
dst += 5;
dstlen -= 5;
} else {
if (dstlen < 6) {
}
p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
p[5] = 0x80 | (Uint8) (ch & 0x3F);
dst += 6;
dstlen -= 6;
}
}
break;
case ENCODING_UTF16BE: /* RFC 2781 */
{
Uint8 *p = (Uint8 *) dst;
if (ch > 0x10FFFF) {
}
if (ch < 0x10000) {
if (dstlen < 2) {
}
p[0] = (Uint8) (ch >> 8);
p[1] = (Uint8) ch;
dst += 2;
dstlen -= 2;
} else {
Uint16 W1, W2;
if (dstlen < 4) {
}
ch = ch - 0x10000;
W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
p[0] = (Uint8) (W1 >> 8);
p[1] = (Uint8) W1;
p[2] = (Uint8) (W2 >> 8);
p[3] = (Uint8) W2;
dst += 4;
dstlen -= 4;
}
}
break;
case ENCODING_UTF16LE: /* RFC 2781 */
{
Uint8 *p = (Uint8 *) dst;
if (ch > 0x10FFFF) {
}
if (ch < 0x10000) {
if (dstlen < 2) {
}
p[1] = (Uint8) (ch >> 8);
p[0] = (Uint8) ch;
dst += 2;
dstlen -= 2;
} else {
Uint16 W1, W2;
if (dstlen < 4) {
}
ch = ch - 0x10000;
W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
p[1] = (Uint8) (W1 >> 8);
p[0] = (Uint8) W1;
p[3] = (Uint8) (W2 >> 8);
p[2] = (Uint8) W2;
dst += 4;
dstlen -= 4;
}
}
break;
{
Uint8 *p = (Uint8 *) dst;
if (ch > 0xFFFF) {
}
if (dstlen < 2) {
}
p[0] = (Uint8) (ch >> 8);
p[1] = (Uint8) ch;
dst += 2;
dstlen -= 2;
}
break;
{
Uint8 *p = (Uint8 *) dst;
if (ch > 0xFFFF) {
}
if (dstlen < 2) {
}
p[1] = (Uint8) (ch >> 8);
p[0] = (Uint8) ch;
dst += 2;
dstlen -= 2;
}
break;
if (ch > 0x10FFFF) {
}
/* fallthrough */
if (ch > 0x7FFFFFFF) {
}
{
Uint8 *p = (Uint8 *) dst;
if (dstlen < 4) {
}
p[0] = (Uint8) (ch >> 24);
p[1] = (Uint8) (ch >> 16);
p[2] = (Uint8) (ch >> 8);
p[3] = (Uint8) ch;
dst += 4;
dstlen -= 4;
}
break;
if (ch > 0x10FFFF) {
}
/* fallthrough */
if (ch > 0x7FFFFFFF) {
}
{
Uint8 *p = (Uint8 *) dst;
if (dstlen < 4) {
}
p[3] = (Uint8) (ch >> 24);
p[2] = (Uint8) (ch >> 16);
p[1] = (Uint8) (ch >> 8);
p[0] = (Uint8) ch;
dst += 4;
dstlen -= 4;
}
break;
}
/* Update state */
*inbuf = src;
*inbytesleft = srclen;
*outbuf = dst;
*outbytesleft = dstlen;
++total;
}
return total;
}
int SDL_iconv_close ( SDL_iconv_t  cd)

Definition at line 854 of file SDL_iconv.c.

References SDL_free.

{
if (cd != (SDL_iconv_t)-1) {
SDL_free(cd);
}
return 0;
}
SDL_iconv_t SDL_iconv_open ( const char *  tocode,
const char *  fromcode 
)

Definition at line 208 of file SDL_iconv.c.

References ENCODING_UNKNOWN, encodings, getlocale(), i, SDL_arraysize, SDL_malloc, and SDL_strcasecmp.

{
int src_fmt = ENCODING_UNKNOWN;
int dst_fmt = ENCODING_UNKNOWN;
int i;
char fromcode_buffer[64];
char tocode_buffer[64];
if (!fromcode || !*fromcode) {
fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
}
if (!tocode || !*tocode) {
tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
}
for (i = 0; i < SDL_arraysize(encodings); ++i) {
if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
src_fmt = encodings[i].format;
if (dst_fmt != ENCODING_UNKNOWN) {
break;
}
}
if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
dst_fmt = encodings[i].format;
if (src_fmt != ENCODING_UNKNOWN) {
break;
}
}
}
if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
if (cd) {
cd->src_fmt = src_fmt;
cd->dst_fmt = dst_fmt;
return cd;
}
}
return (SDL_iconv_t) - 1;
}
char* SDL_iconv_string ( const char *  tocode,
const char *  fromcode,
const char *  inbuf,
size_t  inbytesleft 
)

This function converts a string between encodings in one pass, returning a string that must be freed with SDL_free() or NULL on error.

Definition at line 865 of file SDL_iconv.c.

References NULL, SDL_iconv, SDL_iconv_close, SDL_ICONV_E2BIG, SDL_ICONV_EILSEQ, SDL_ICONV_EINVAL, SDL_ICONV_ERROR, SDL_iconv_open, SDL_malloc, SDL_memset, and SDL_realloc.

{
SDL_iconv_t cd;
char *string;
size_t stringsize;
char *outbuf;
size_t outbytesleft;
size_t retCode = 0;
cd = SDL_iconv_open(tocode, fromcode);
if (cd == (SDL_iconv_t) - 1) {
/* See if we can recover here (fixes iconv on Solaris 11) */
if (!tocode || !*tocode) {
tocode = "UTF-8";
}
if (!fromcode || !*fromcode) {
fromcode = "UTF-8";
}
cd = SDL_iconv_open(tocode, fromcode);
}
if (cd == (SDL_iconv_t) - 1) {
return NULL;
}
stringsize = inbytesleft > 4 ? inbytesleft : 4;
string = SDL_malloc(stringsize);
if (!string) {
return NULL;
}
outbuf = string;
outbytesleft = stringsize;
SDL_memset(outbuf, 0, 4);
while (inbytesleft > 0) {
retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
switch (retCode) {
{
char *oldstring = string;
stringsize *= 2;
string = SDL_realloc(string, stringsize);
if (!string) {
return NULL;
}
outbuf = string + (outbuf - oldstring);
outbytesleft = stringsize - (outbuf - string);
SDL_memset(outbuf, 0, 4);
}
break;
/* Try skipping some input data - not perfect, but... */
++inbuf;
--inbytesleft;
break;
/* We can't continue... */
inbytesleft = 0;
break;
}
}
return string;
}

Variable Documentation

struct { ... } encodings[]

Referenced by SDL_iconv_open().

int format

Definition at line 138 of file SDL_iconv.c.

const char* name

Definition at line 137 of file SDL_iconv.c.