/* convert() - converts machine dependend character standards */

/* This is convert version 0.10PL0 - 930221  Written by Thomas Osterried, DL9SAU.
 * Files: iso.c, iso.h and iso.doc.
 *
 *      Copyright 1993 by Thomas Osterried, DL9SAU.
 *      Permission granted for non-commercial use and copying, provided
 *      that this notice is retained.
 *
 * Important notice:
 * -----------------
 *   - this convert tool can easily be implemented in other software
 *     with only including iso.h and compiling together with iso.c
 *   - before convert() is usable, charset_init() has to be called
 *   - convert() does not return dynamic buffers; for that, MAXBUF
 *     is defined (below) which must not be less than the input string
 *   - convert() cannot verify if it exceeds the input buffer dimension.
 *     For that it's strongly recommended, that the input buffer is just
 *     filled up to half (to stand the worst case).
 *   - If you'd like to establish new standards, don't forget to increase
 *     the CHARSETS definition in iso.h
 *
 * About compiling a standalone convert program:
 *   cc -DSTANDALONE -O -s -o convert iso.c
 *   ln -s convert TEXtoISO; ..
 *
 * Enjoy it!    - Thommy
 *   <dl9sau@dl9sau.bawue.dl.ampr.org>, DL9SAU @ DB0SAO.DEU.EU
 *   <dl9sau@zedat.fu-berlin.de>
 */
 

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "convert.h"

static struct charsets charsets[] = {
  {"iso-8859-1",  ISO},
  {"ansi",        ISO},
  {"8bit",        ISO},
  {"html",        HTML},
  {"dumb",        dumb},
  {"ascii",       dumb},
  {"none",        dumb},
  {"us",          dumb},
  {"tex",         TeX},
  {"ibm7bit",     IBM7},
  {"7bit",        IBM7},
  {"commodore",   IBM7},
  {"c64",         IBM7},
  {"digicom",     IBM7},
  {"roman8",      ROMAN8},
  {"ibmpc",       IBMPC},
  {"pc",          IBMPC},
  {"at",          IBMPC},
  {"xt",          IBMPC},
  {"atari",       ATARI},
  {"binary",      BIN},
  {"image",       BIN},
  {(char *) 0,    -1}
};

static void init_iso __ARGS((void));
static void init_dumb __ARGS((void));
static void init_tex __ARGS((void));
static void init_ibm7 __ARGS((void));
static void init_roman8 __ARGS((void));
static void init_ibmpc __ARGS((void));
static void init_atari __ARGS((void));

#define MAXBUF  2048

#define  uchar(x)  ((x) & 0xff)
#define isupperalphauchar(x)  (isalpha(uchar(x)) && isupper(uchar(x)))

#if defined(mips)
extern char *strdup __ARGS((char *s));
#endif

#define  CP(x, y) {          \
  sprintf(buf, "%c", y);     \
  cpp[x] = strdup(buf);      \
}

static char  *charset[CHARSETS][CHARS];

static void init_iso()
{
  char **cpp;
  char buf[2];
  int i;

  static int twice = 0;

  if (twice) return;
  twice = 1;

  cpp = charset[ISO];

  for (i = 0; i < 96; i++) {
    sprintf(buf, "%c", 160+i);
    cpp[i] = strdup(buf);
  }
}

/*---------------------------------------------------------------------------*/

static void init_dumb()
{
  char  **cpp;

  static int twice = 0;

  if (twice) return;
  twice = 1;

  cpp = charset[dumb];

  cpp[36]  = "Ae";
  cpp[54]  = "Oe";
  cpp[60]  = "Ue";
  cpp[63]  = "ss";
  cpp[68]  = "ae";
  cpp[86]  = "oe";
  cpp[92]  = "ue";
}

/*---------------------------------------------------------------------------*/

static void init_html()
{
  char  **cpp;

  static int twice = 0;

  if (twice) return;
  twice = 1;

  cpp = charset[HTML];

  cpp[36]       = "&Auml;";
  cpp[54]       = "&Ouml;";
  cpp[60]       = "&Uuml;";
  cpp[63]       = "&szlig;";
  cpp[68]       = "&auml;";
  cpp[86]       = "&ouml;";
  cpp[92]       = "&uuml;";
}

/*---------------------------------------------------------------------------*/

static void init_tex()
{
  char  **cpp;

  static int twice = 0;

  if (twice) return;
  twice = 1;

  cpp = charset[TeX];

  cpp[36]       = "\"A";
  cpp[54]       = "\"O";
  cpp[60]       = "\"U";
  cpp[63]       = "\"s";
  cpp[68]       = "\"a";
  cpp[86]       = "\"o";
  cpp[92]       = "\"u";
}

/*---------------------------------------------------------------------------*/

static void init_ibm7()
{
  char  **cpp;

  static int twice = 0;

  if (twice) return;
  twice = 1;

  cpp = charset[IBM7];

  cpp[36]       = "[";
  cpp[54]       = "\\";
  cpp[60]       = "]";
  cpp[63]       = "~";
  cpp[68]       = "{";
  cpp[86]       = "|";
  cpp[92]       = "}";
}

/*---------------------------------------------------------------------------*/

static void init_roman8()
{
  char **cpp;
  char buf[2];

  static int twice = 0;

  if (twice) return;
  twice = 1;

  cpp = charset[ROMAN8];

  CP(0, 255);  CP(1, 184);  CP(2, 191);  CP(3, 187);  CP(4, 186);
  CP(5, 188);  CP(7, 189);  CP(8, 171);  CP(10, 249); CP(11, 251);
  CP(13, 246); CP(15, 176); CP(16, 179); CP(17, 254); CP(20, 168);
  CP(26, 250); CP(27, 253); CP(28, 247); CP(29, 248); CP(31, 185);
  CP(32, 161); CP(33, 224); CP(34, 162); CP(35, 225); CP(36, 216);
  CP(37, 208); CP(38, 211); CP(39, 180); CP(40, 163); CP(41, 220);
  CP(42, 164); CP(43, 165); CP(44, 230); CP(45, 229); CP(46, 166);
  CP(47, 167); CP(48, 227); CP(49, 182); CP(50, 232); CP(51, 231);
  CP(52, 223); CP(53, 233); CP(54, 218); CP(56, 210); CP(57, 173);
  CP(58, 237); CP(59, 174); CP(60, 219); CP(62, 240); CP(63, 222);
  CP(64, 200); CP(65, 196); CP(66, 192); CP(67, 226); CP(68, 204);
  CP(69, 212); CP(70, 215); CP(71, 181); CP(72, 201); CP(73, 197);
  CP(74, 193); CP(75, 205); CP(76, 217); CP(77, 213); CP(78, 209);
  CP(79, 221); CP(80, 228); CP(81, 183); CP(82, 202); CP(83, 198);
  CP(84, 194); CP(85, 234); CP(86, 206); CP(88, 214); CP(89, 203);
  CP(90, 199); CP(91, 195); CP(92, 207); CP(94, 241); CP(95, 239);

  CP(96, 252);
}

/*---------------------------------------------------------------------------*/

static void init_ibmpc()
{
  char **cpp;
  char buf[2];

  static int twice = 0;

  if (twice) return;
  twice = 1;

  cpp = charset[IBMPC];

  CP(0, 255);  CP(1, 173);  CP(2, 155);  CP(3, 156);  CP(5, 157);
  CP(10, 166); CP(11, 174); CP(12, 170); CP(13, 196); CP(16, 248);
  CP(17, 241); CP(18, 253); CP(21, 230); CP(23, 249); CP(26, 167);
  CP(27, 175); CP(28, 172); CP(29, 171); CP(31, 168); CP(36, 142);
  CP(37, 143); CP(38, 146); CP(39, 128); CP(40, 144); CP(49, 165);
  CP(54, 153); CP(60, 154); CP(63, 225); CP(64, 133); CP(65, 160);
  CP(66, 131); CP(68, 132); CP(69, 134); CP(70, 145); CP(71, 135);
  CP(72, 138); CP(73, 130); CP(74, 136); CP(75, 137); CP(76, 141);
  CP(77, 161); CP(78, 140); CP(79, 139); CP(80, 229); CP(81, 164);
  CP(82, 149); CP(83, 162); CP(84, 147); CP(86, 148); CP(87, 246);
  CP(88, 237); CP(89, 151); CP(90, 163); CP(91, 150); CP(92, 129);
  CP(95, 152);

  CP(96, 254);
}

/*---------------------------------------------------------------------------*/

static void init_atari()
{
  int i;
  char buf[2];

  static int twice = 0;

  if (twice) return;
  twice = 1;

  /* Since we need the data from init_ibmpc, that function checks
   * if it has been called before
   */

  init_ibmpc();

  /* IBM and ATARI are mainly identical - nevertheless, there's one exception */
  for (i = 0; i < CHARS; i++)
      charset[ATARI][i] = charset[IBMPC][i];

  sprintf(buf, "%c", 0x9e);
  charset[ATARI][63] = strdup(buf);
}

/*---------------------------------------------------------------------------*/

void charset_init()
{
  static int twice = 0;

  if (twice) return;
  twice = 1;

  init_iso();
  init_dumb();
  init_tex();
  init_ibm7();
  init_roman8();
  init_ibmpc();
  init_atari();
  init_html();

}

/*---------------------------------------------------------------------------*/

int get_charset_by_name(buf)
char *buf;
{

  int len;
  struct charsets *p_charset;

  if (!(len = strlen(buf))) return ISO;

  for (p_charset = charsets; p_charset->name; p_charset++)
    if (!strncmp(p_charset->name, buf, len))
      return p_charset->ind;

  return -1;

}

/*---------------------------------------------------------------------------*/

char *get_charset_by_ind(ind)
int ind;
{

  struct charsets *p_charset;

  for (p_charset = charsets; p_charset->name; p_charset++)
    if (p_charset->ind == ind)
      return p_charset->name;

  return (char *) 0;

}

/*---------------------------------------------------------------------------*/

char *list_charsets()
{

  char buf[2048];
  char tmp[2048];
  int i;
  struct charsets *p_charset;

  static char *p = (char *) 0;

  if (p) return p;

  *buf = '\0';
  for (i = 0; i < CHARSETS; i++) {
    *tmp = '\0';
    for (p_charset = charsets; p_charset->name; p_charset++) {
      if (i != p_charset->ind) continue;
      if (*tmp) strcat(tmp, ", ");
      strcat(tmp, p_charset->name);
    }
    strcat(tmp, "\n");
    strcat(buf, tmp);
  }

  /* No good solution - but BIN is not part of CHARSERTS */
  *tmp = '\0';
  for (p_charset = charsets; p_charset->name; p_charset++) {
    if (p_charset->ind != BIN) continue;
      if (*tmp) strcat(tmp, ", ");
      strcat(tmp, p_charset->name);
  }
  strcat(tmp, "\n");
  strcat(buf, tmp);

  return (p = strdup(buf));

}

/*---------------------------------------------------------------------------*/

char *convert(in,out,string)
int in;
int out;
char *string;
{

  char buf[MAXBUF];
  register char *p, *q, *curr_set;

  int pos;

  if (in == BIN || in == dumb || in == out) return string;
  if (in < 0 || out < 0 || in > CHARSETS-1 || out > CHARSETS-1) return string;

  for (p = string, q = buf; *p; p++) {

    /* Escaped parentheses? */
    if (in == TeX && !strncmp(p, "\\\"", 2)) {
      p++;
      continue;
    }

    if (!(in == TeX && *p != '\"')) {
      /* Look for correspondings */
      for (pos = 0; pos < CHARS; pos++) {
  	curr_set = charset[in][pos];
  	if (curr_set && !strncmp(p, curr_set, strlen(curr_set))) {
    	  curr_set = charset[out][pos];
    	  break;
  	}
      }
      /* Something found? */
      if (pos < CHARS && curr_set) {
  	if (in == TeX) p++;
  	while(*curr_set)
    	  *q++ = *curr_set++;
  	if (out == dumb &&
    		(isupperalphauchar(p[1])  ||
      		  ((q-2 == buf ||
      		    (q-2 > buf && isupperalphauchar(*(q-2)) && isupperalphauchar(*(q-3)))) &&
  			!isalnum(uchar(p[1])))))
    	  *(q-1) = toupper(uchar(*(curr_set-1)));
  	continue;
      }
    }

    *q++ = *p;

  }

  *q = '\0';
  strcpy(string, buf);

  return string;
}

/*---------------------------------------------------------------------------*/

#ifdef	STANDALONE

static void pexit __ARGS((char *mesg));
static void doconvert __ARGS((FILE *fp));

#define USAGE "usage: convert in out [filename]  or  ${in}to${out} [filename]"

static char *myname;
static int in, out;

/*---------------------------------------------------------------------------*/

static void pexit(mesg)
char  *mesg;
{
  if (mesg)
    perror(mesg);
  else
    fprintf(stderr, "%s: invalid charset\n%s\n\nin/out charsets are:\n%s\n", myname, USAGE, list_charsets());
  exit(1);
}

/*---------------------------------------------------------------------------*/

static void doconvert(FILE *fp)
{
  char buf[MAXBUF/2];

  while (fgets(buf, sizeof(buf), fp))
    fputs(convert(in, out, buf), stdout);

  if (ferror(fp))
    pexit("read");
}


/*---------------------------------------------------------------------------*/

main(int argc, char *argv[])
{

  FILE * fp;

  int ind;
  char *p;

  ind = 1;
  in = out = -1;

  if ((p = strrchr(argv[0], '/')
#ifdef	__TURBOC__
  	|| p = strrchr(argv[0], '\\')
#endif
	) && *++p)
    myname = strdup(p);
  else myname = strdup(argv[0]);

#ifdef	__TURBOC__
  myname = strlwr(myname);
#else
  for (p = myname; *p; p++)
    *p = tolower(uchar(*p));
#endif

  if ((p = strstr(myname, "to")) && p[1]) {
    *p = '\0';
    p = &p[2];
    out = get_charset_by_name(p);
    in = get_charset_by_name(myname);
  }
  if (in < 0 || out < 0 && argc >= 3) {
      in = get_charset_by_name(argv[1]);
      out = get_charset_by_name(argv[2]);
      ind = 3;
  }
  if (in < 0 || out < 0) pexit(0);

  charset_init();

  if (ind < argc) {

    while (ind < argc) {
      p = argv[ind++];
      if (!(fp = fopen(p, "r")))
        pexit(p);
      doconvert(fp);
      fclose(p);
    }

  } else doconvert(stdin);

  exit(0);

  return 0;
}
#endif
