Logo Search packages:      
Sourcecode: o3read version File versions  Download package

utf8tolatin1.c

/*
   Copyright (C) 2002  Ulric Eriksson <ulric@siag.nu>

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston,
   MA 02111-1307, USA.
 */

#include <stdio.h>

int main(void)
{
      unsigned long ucs4;
      int c, state, octets;

      ucs4 = 0;
      state = 0;
      octets = 0;

      while ((c = getchar()) != EOF) {
            switch (state) {
            case 0:     /* start of utf8 char */
                  ucs4 = 0;   /* reset ucs4 char */
                  if ((c & 0xfe) == 0xfc) {           /* 6 octets */
                        ucs4 = (c & 0x01) << 30;
                        octets = 6;
                        state = 5;  /* look for 5 more */
                  } else if ((c & 0xfc) == 0xf8) {    /* 5 octets */
                        ucs4 = (c & 0x03) << 24;
                        octets = 5;
                        state = 4;
                  } else if ((c & 0xf8) == 0xf0) {    /* 4 octets */
                        ucs4 = (c & 0x07) << 18;
                        octets = 4;
                        state = 3;
                  } else if ((c & 0xf0) == 0xe0) {    /* 3 octets */
                        ucs4 = (c & 0x0f) << 12;
                        octets = 3;
                        state = 2;
                  } else if ((c & 0xe0) == 0xc0) {    /* 2 octets */
                        ucs4 = (c & 0x1f) << 6;
                        octets = 2;
                        state = 1;  /* look for 1 more */
                  } else if ((c & 0x80) == 0x00) {    /* 1 octet */
                        ucs4 = (c & 0x7f);
                        octets = 1;
                        state = 0;  /* we have a result */
                  } else {                      /* error */
                        ;
                  }
                  break;
            case 1:
                  if ((c & 0xc0) == 0x80) {
                        ucs4 = ucs4 | (c & 0x3f);
                        if (ucs4 < 0x80 || ucs4 > 0x7ff) {
                              ucs4 = 0xffffffff;
                        }
                  } else {
                        ucs4 = 0xffffffff;
                  }
                  state = 0;  /* we're done and have a result */
                  break;
            case 2:
                  if ((c & 0xc0) == 0x80) {
                        ucs4 = ucs4 | ((c & 0x3f) << 6);
                        state = 1;
                  } else {
                        ucs4 = 0xffffffff;
                        state = 0;
                  }
                  break;
            case 3:
                  if ((c & 0xc0) == 0x80) {
                        ucs4 = ucs4 | ((c & 0x3f) << 12);
                        state = 2;
                  } else {
                        ucs4 = 0xffffffff;
                        state = 0;
                  }
                  break;
            case 4:
                  if ((c & 0xc0) == 0x80) {
                        ucs4 = ucs4 | ((c & 0x3f) << 18);
                        state = 3;
                  } else {
                        ucs4 = 0xffffffff;
                        state = 0;
                  }
                  break;
            case 5:
                  if ((c & 0xc0) == 0x80) {
                        ucs4 = ucs4 | ((c & 0x3f) << 24);
                        state = 4;
                  } else {
                        ucs4 = 0xffffffff;
                        state = 0;
                  }
                  break;
            default:    /* error, can't happen */
                  ucs4 = 0xffffffff;
                  state = 0;
                  break;
            }
            if (state == 0) {
                  switch (octets) {
                  case 1:
                        if (ucs4 < 0x0 || ucs4 > 0x7f)
                              ucs4 = 0xffffffff;
                        break;
                  case 2:
                        if (ucs4 < 0x80 || ucs4 > 0x7ff)
                              ucs4 = 0xffffffff;
                        break;
                  case 3:
                        if (ucs4 < 0x800 || ucs4 > 0xffff)
                              ucs4 = 0xffffffff;
                        break;
                  case 4:
                        if (ucs4 < 0x10000 || ucs4 > 0x1fffff)
                              ucs4 = 0xffffffff;
                        break;
                  case 5:
                        if (ucs4 < 0x200000 || ucs4 > 0x3ffffff)
                              ucs4 = 0xffffffff;
                        break;
                  case 6:
                        if (ucs4 < 0x4000000 || ucs4 > 0x7fffffff)
                              ucs4 = 0xffffffff;
                        break;
                  default:
                        ucs4 = 0xffffffff;
                        break;
                  }
                  if (ucs4 != 0xffffffff) {
                        printf("%c", (int)ucs4);
                  }
            }
      }
      return 0;
}


Generated by  Doxygen 1.6.0   Back to index