//**************************************************************************
//*                     This file is part of the                           *
//*                      Mpxplay - audio player.                           *
//*                  The source code of Mpxplay is                         *
//*        (C) copyright 1998-2005 by PDSoft (Attila Padar)                *
//*                    http://mpxplay.cjb.net                              *
//*                  email: mpxplay@freemail.hu                            *
//**************************************************************************
//*  This program is distributed in the hope that it will be useful,       *
//*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
//*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
//*  Please contact with the author (with me) if you want to use           *
//*  or modify this source.                                                *
//**************************************************************************
//function:text (tag) conversion : codepage to codepage and UTF-8 decoding

#include <malloc.h>
#include "newfunc\newfunc.h"
#include "playlist.h"

extern unsigned int id3textconv;
extern char cp_winchars[256],cp_doschars[256];

//-------------------------------------------------------------------------
// A lot of elements in the table(s) are same with the US-ASCII
// We begin the table at the 1. different element (cp_maps[].begin)

static unsigned short mapping_iso_8859_2[256-161] = {
	  0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
  0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
  0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
  0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
  0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
  0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
  0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
  0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
  0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
  0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
  0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
  0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9
};

static struct cp_map_s{
  char *name;
  unsigned short *map;
  unsigned short begin;
} cp_maps[] = {
  {"ISO-8859-2", mapping_iso_8859_2 ,161 }, // default target codepage
  {"ISO-8859-1", NULL,               256 },
  {NULL,NULL}
};

char *textconv_codepage_selectname;
static unsigned char *textconv_table_unicode_to_char;
static unsigned char *textconv_table_cp_to_cp_IN;
static unsigned char *textconv_table_cp_to_cp_OUT;

static struct cp_map_s *textconv_select_mapping(void)
{
 struct cp_map_s *targetmap;
 targetmap=&cp_maps[0];
 if(textconv_codepage_selectname){
  struct cp_map_s *mapp=targetmap;
  while(mapp->name){
   if(pds_stricmp(mapp->name,textconv_codepage_selectname)==0){
    targetmap=mapp;
    break;
   }
   mapp++;
  }
 }
 return (targetmap);
}

static unsigned int playlist_textconv_init_codepage_IN(void)
{
 unsigned int i;
 if(textconv_table_cp_to_cp_IN)
  return 1;
 textconv_table_cp_to_cp_IN=malloc(256);
 if(!textconv_table_cp_to_cp_IN)
  return 0;
 for(i=0;i<32;i++)
  textconv_table_cp_to_cp_IN[i]=32;
 for(i=32;i<256;i++)
  textconv_table_cp_to_cp_IN[i]=i;
 for(i=0;i<pds_strlen(cp_winchars);i++)
  textconv_table_cp_to_cp_IN[cp_winchars[i]]=cp_doschars[i];
 return 1;
}

static unsigned int playlist_textconv_init_codepage_OUT(void)
{
 unsigned int i;
 if(textconv_table_cp_to_cp_OUT)
  return 1;
 textconv_table_cp_to_cp_OUT=malloc(256);
 if(!textconv_table_cp_to_cp_OUT)
  return 0;
 for(i=0;i<32;i++)
  textconv_table_cp_to_cp_OUT[i]=32;
 for(i=32;i<256;i++)
  textconv_table_cp_to_cp_OUT[i]=i;
 for(i=0;i<pds_strlen(cp_winchars);i++)
  textconv_table_cp_to_cp_OUT[cp_doschars[i]]=cp_winchars[i];
 return 1;
}

static unsigned int playlist_textconv_init_unicode_IN(void)
{
 struct cp_map_s *targetmap;
 unsigned int i;

 if(textconv_table_unicode_to_char)
  return 1;
 textconv_table_unicode_to_char=malloc(65536*sizeof(*textconv_table_unicode_to_char));

 if(!textconv_table_unicode_to_char)
  return 0;

 pds_memset(textconv_table_unicode_to_char,'?',65536*sizeof(*textconv_table_unicode_to_char));

 targetmap=textconv_select_mapping();

 for(i=0;i<targetmap->begin;i++)
  if(i<128 || !funcbit_test(id3textconv,ID3TEXTCONV_VALIDATE) || (pds_strchr(cp_winchars,(int)i)))   // to avoid invalid UTF-8 decodings
   textconv_table_unicode_to_char[i]=i;

 for(   ;i<256;i++)
  if(!funcbit_test(id3textconv,ID3TEXTCONV_VALIDATE) || (pds_strchr(cp_winchars,(int)i)))   // to avoid invalid UTF-8 decodings
   textconv_table_unicode_to_char[targetmap->map[i-targetmap->begin]]=i;

 return 1;
}

void mpxplay_playlist_textconv_init(void)
{
 if(funcbit_test(id3textconv,ID3TEXTCONV_CODEPAGE)){
  if(!playlist_textconv_init_codepage_IN())
   funcbit_disable(id3textconv,ID3TEXTCONV_CODEPAGE);
 }
 if(funcbit_test(id3textconv,(ID3TEXTCONV_UTF8|ID3TEXTCONV_UTF16))){
  if(!playlist_textconv_init_unicode_IN())
   funcbit_disable(id3textconv,(ID3TEXTCONV_UTF8|ID3TEXTCONV_UTF16));
 }
}

void mpxplay_playlist_textconv_close(void)
{
 if(textconv_table_cp_to_cp_IN)
  free(textconv_table_cp_to_cp_IN);
 if(textconv_table_cp_to_cp_OUT)
  free(textconv_table_cp_to_cp_OUT);
 if(textconv_table_unicode_to_char)
  free(textconv_table_unicode_to_char);
}

//-----------------------------------------------------------------------

//little endian utf16 to char
static unsigned int playlist_textconv_utf16_LE_to_char(unsigned char *str,unsigned int datalen)
{
 unsigned int index_in=0,index_out=0;
 unsigned short unicode;

 if(datalen<2)
  return datalen;
 if(!playlist_textconv_init_unicode_IN())
  return datalen;

 do{
  unicode=*((unsigned short *)&str[index_in]);
  index_in+=2;
  if(unicode!=0xfffe && unicode!=0xfeff){
   str[index_out]=textconv_table_unicode_to_char[unicode];
   index_out++;
  }
 }while(unicode && (index_in<datalen));
 return index_out;
}

//big endian utf16 to char
static unsigned int playlist_textconv_utf16_BE_to_char(unsigned char *str,unsigned int datalen)
{
 unsigned int index_in=0,index_out=0;
 unsigned short unicode;

 if(datalen<2)
  return datalen;
 if(!playlist_textconv_init_unicode_IN())
  return datalen;

 do{
  unicode=*((unsigned short *)&str[index_in]);
  unicode=((unicode&0x00ff)<<8)|((unicode&0xff00)>>8);
  index_in+=2;
  if(unicode!=0xfffe && unicode!=0xfeff){
   str[index_out]=textconv_table_unicode_to_char[unicode];
   index_out++;
  }
 }while(unicode && (index_in<datalen));
 return index_out;
}

//utf8 to char
static unsigned int playlist_textconv_utf8_to_char(unsigned char *str,unsigned int datalen)
{
 unsigned int index_in=0,index_out=0;

 if(!playlist_textconv_init_unicode_IN())
  return datalen;

 do{
  unsigned short unicode;
  unsigned int codesize;
  unsigned char c;

  c=str[index_in];
  if(!c)
   break;

  codesize=0;

  if(c&0x80){
   if((c&0xe0)==0xe0){
    unicode = (c&0x0F) << 12;
    c = str[index_in+1];
    if(c){
     unicode |= (c&0x3F) << 6;
     c = str[index_in+2];
     if(c){
      unicode |= (c&0x3F);
      codesize=3;
     }
    }
   }else{
    unicode = (c&0x3F) << 6;
    c = str[index_in+1];
    if(c){
     unicode |= (c&0x3F);
     codesize=2;
    }
   }
  }

  if(codesize && (textconv_table_unicode_to_char[unicode]>=0x80)){ // we try to find out is this an UTF-8 or not
   str[index_out]=textconv_table_unicode_to_char[unicode];
   index_in+=codesize;
  }else{
   c=str[index_in];
   str[index_out]=c;
   index_in++;
  }
  index_out++;

 }while(index_in<datalen);

 str[index_out]=0;
 return index_out;
}

static unsigned int playlist_textconv_codepage_to_codepage(unsigned char *str,unsigned int datalen)
{
 unsigned int len;

 if(!playlist_textconv_init_codepage_IN())
  return datalen;

 len=0;
 do{
  str[0]=textconv_table_cp_to_cp_IN[str[0]];
  str++;
  len++;
 }while(*str && (len<datalen));
 return len;
}

unsigned int mpxplay_playlist_textconv_do(char *str,unsigned int datalen,unsigned int doneconv)
{
 if(funcbit_test(id3textconv,(ID3TEXTCONV_CODEPAGE|ID3TEXTCONV_UTF8|ID3TEXTCONV_UTF16))){
  if((str==NULL) || (str[0]==0) || !datalen)
   return 0;
  if(funcbit_test(id3textconv,ID3TEXTCONV_UTF16) && !funcbit_test(doneconv,ID3TEXTCONV_UTF16))
   datalen=playlist_textconv_utf16_LE_to_char(str,datalen);
  else
   if(funcbit_test(id3textconv,ID3TEXTCONV_UTF8) && !funcbit_test(doneconv,ID3TEXTCONV_UTF8))
    datalen=playlist_textconv_utf8_to_char(str,datalen);
  if(funcbit_test(id3textconv,ID3TEXTCONV_CODEPAGE) && !funcbit_test(doneconv,ID3TEXTCONV_CODEPAGE))
   datalen=playlist_textconv_codepage_to_codepage(str,datalen);
 }else{
  if(!datalen)
   datalen=pds_strlen(str);
 }
 return datalen;
}

//----------------------------------------------------------------------
static struct cp_map_s *utf8_targetmap;

static void playlist_textconv_char_to_utf8(unsigned char *dest,unsigned char *src,unsigned int buflen)
{
 unsigned int index_out=0;

 if(!dest || !src)
  return;

 buflen-=4;

 if(!utf8_targetmap)
  utf8_targetmap=textconv_select_mapping();

 do{
  unsigned int c=*src++;
  unsigned int wc=(c<utf8_targetmap->begin)? c:utf8_targetmap->map[c-utf8_targetmap->begin];

  if(wc < (1<<7)){
   dest[0] = wc;
   dest+=1;
   index_out+=1;
  }
  else if(wc < (1<<11)){
   dest[0] = 0xc0 | (wc >> 6);
   dest[1] = 0x80 | (wc & 0x3f);
   dest+=2;
   index_out+=2;
  }
  else if(wc < (1<<16)){
   dest[0] = 0xe0 | (wc >> 12);
   dest[1] = 0x80 | ((wc >> 6) & 0x3f);
   dest[2] = 0x80 | (wc & 0x3f);
   dest+=3;
   index_out+=3;
  }
 }while(index_out<buflen);
 dest[0]=0;
}

static void playlist_textconv_codepage_to_codepage_out(unsigned char *dest,unsigned char *src)
{
 unsigned int i;

 if(!playlist_textconv_init_codepage_OUT())
  return;

 i=MAX_ID3LEN-1;
 do{
  dest[0]=textconv_table_cp_to_cp_OUT[src[0]];
  src++;dest++;
 }while(*src && --i);
 dest[0]=0;
}

char *mpxplay_playlist_textconv_back(char *dest,char *src)
{
 if(!dest)
  return src;
 if(!src)
  dest[0]=0;
 else{
  if(funcbit_test(id3textconv,ID3TEXTCONV_CODEPAGE))
   playlist_textconv_codepage_to_codepage_out(dest,src);
  else
   return src;
 }
 return dest;
}

//------------------------------------------------------------------------

mpxplay_textconv_func_s mpxplay_playlist_textconv_funcs={
 &id3textconv,
 0,

 &playlist_textconv_utf16_LE_to_char,
 &playlist_textconv_utf16_BE_to_char,
 &playlist_textconv_utf8_to_char,
 &playlist_textconv_codepage_to_codepage,
 &mpxplay_playlist_textconv_do,

 &playlist_textconv_char_to_utf8,
 &playlist_textconv_codepage_to_codepage_out
};
