Logo Search packages:      
Sourcecode: tucnak2 version File versions  Download package

charsets.c

/*
    Tucnak - VHF contest log
    Copyright (C) 2002-2006  Ladislav Vaiz <ok1zia@nagano.cz>
    and authors of web browser Links 0.96

    This program is free software; you can redistribute it and/or                                                        
    modify it under the terms of the GNU General Public License                                                          
    version 2 as published by the Free Software Foundation.

*/

#include "header.h"

00014 struct table_entry {
    char c;
    unicode_val u;
};

00019 struct codepage_desc {
    char *name;
    char **aliases;
    struct table_entry *table;
};

#include "codepage.inc"
#include "uni_7b.inc"

char *strings[256] = {
    "\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007",
    "\010", "\011", "\012", "\013", "\014", "\015", "\016", "\017",
    "\020", "\021", "\022", "\023", "\024", "\025", "\026", "\033",
    "\030", "\031", "\032", "\033", "\034", "\035", "\036", "\033",
    "\040", "\041", "\042", "\043", "\044", "\045", "\046", "\047",
    "\050", "\051", "\052", "\053", "\054", "\055", "\056", "\057",
    "\060", "\061", "\062", "\063", "\064", "\065", "\066", "\067",
    "\070", "\071", "\072", "\073", "\074", "\075", "\076", "\077",
    "\100", "\101", "\102", "\103", "\104", "\105", "\106", "\107",
    "\110", "\111", "\112", "\113", "\114", "\115", "\116", "\117",
    "\120", "\121", "\122", "\123", "\124", "\125", "\126", "\127",
    "\130", "\131", "\132", "\133", "\134", "\135", "\136", "\137",
    "\140", "\141", "\142", "\143", "\144", "\145", "\146", "\147",
    "\150", "\151", "\152", "\153", "\154", "\155", "\156", "\157",
    "\160", "\161", "\162", "\163", "\164", "\165", "\166", "\167",
    "\170", "\171", "\172", "\173", "\174", "\175", "\176", "\177",
    "\200", "\201", "\202", "\203", "\204", "\205", "\206", "\207",
    "\210", "\211", "\212", "\213", "\214", "\215", "\216", "\217",
    "\220", "\221", "\222", "\223", "\224", "\225", "\226", "\227",
    "\230", "\231", "\232", "\233", "\234", "\235", "\236", "\237",
    "\240", "\241", "\242", "\243", "\244", "\245", "\246", "\247",
    "\250", "\251", "\252", "\253", "\254", "\255", "\256", "\257",
    "\260", "\261", "\262", "\263", "\264", "\265", "\266", "\267",
    "\270", "\271", "\272", "\273", "\274", "\275", "\276", "\277",
    "\300", "\301", "\302", "\303", "\304", "\305", "\306", "\307",
    "\310", "\311", "\312", "\313", "\314", "\315", "\316", "\317",
    "\320", "\321", "\322", "\323", "\324", "\325", "\326", "\327",
    "\330", "\331", "\332", "\333", "\334", "\335", "\336", "\337",
    "\340", "\341", "\342", "\343", "\344", "\345", "\346", "\347",
    "\350", "\351", "\352", "\353", "\354", "\355", "\356", "\357",
    "\360", "\361", "\362", "\363", "\364", "\365", "\366", "\367",
    "\370", "\371", "\372", "\373", "\374", "\375", "\376", "\377",
};

void free_translation_table(struct conv_table *p)
{
    int i;
    for (i = 0; i < 256; i++) if (p[i].t) free_translation_table(p[i].u.tbl);
    mem_free(p);
}

char *no_str = NULL;

void new_translation_table(struct conv_table *p)
{
    int i;
    if (!no_str) no_str = stracpy("*");
    for (i = 0; i < 256; i++) if (p[i].t) free_translation_table(p[i].u.tbl);
    for (i = 0; i < 128; i++) p[i].t = 0, p[i].u.str = strings[i];
    for (; i < 256; i++) p[i].t = 0, p[i].u.str = no_str;
}

#define BIN_SEARCH(table, entry, entries, key, result)                  \
{                                           \
    int _s = 0, _e = (entries) - 1;                         \
    while (_s <= _e || !((result) = -1)) {                      \
        int _m = (_s + _e) / 2;                         \
        if ((table)[_m].entry == (key)) {                   \
            (result) = _m;                          \
            break;                              \
        }                                   \
        if ((table)[_m].entry > (key)) _e = _m - 1;             \
        if ((table)[_m].entry < (key)) _s = _m + 1;             \
    }                                       \
}                                           \

static const unicode_val strange_chars[32] = {
0x20ac, 0x0000, 0x002a, 0x0000, 0x201e, 0x2026, 0x2020, 0x2021,
0x005e, 0x2030, 0x0160, 0x003c, 0x0152, 0x0000, 0x0000, 0x0000,
0x0000, 0x0060, 0x0027, 0x0022, 0x0022, 0x002a, 0x2013, 0x2014,
0x007e, 0x2122, 0x0161, 0x003e, 0x0153, 0x0000, 0x0000, 0x0000,
};

char *u2cp(unicode_val u, int to)
{
    int j, s;
    if (u < 128) return strings[u];
    if (u == 0xa0) return "\001";
    if (u == 0xad) return "";
    if (u < 0xa0) {
        if (!strange_chars[u - 0x80]) return NULL;
        return u2cp(strange_chars[u - 0x80], to);
    }
    for (j = 0; codepages[to].table[j].c; j++)
        if (codepages[to].table[j].u == u)
            return strings[(unsigned int)codepages[to].table[j].c];
    BIN_SEARCH(unicode_7b, x, N_UNICODE_7B, u, s);
    if (s != -1) return unicode_7b[s].s;
    return NULL;
}

char utf_buffer[7];

char *encode_utf_8(unicode_val u)
{
    memset(utf_buffer, 0, 7);
    if (u < 0x80) utf_buffer[0] = u;
    else if (u < 0x800)
        utf_buffer[0] = 0xc0 | ((u >> 6) & 0x1f),
        utf_buffer[1] = 0x80 | (u & 0x3f);
    else if (u < 0x10000)
        utf_buffer[0] = 0xe0 | ((u >> 12) & 0x0f),
        utf_buffer[1] = 0x80 | ((u >> 6) & 0x3f),
        utf_buffer[2] = 0x80 | (u & 0x3f);
    else if (u < 0x200000)
        utf_buffer[0] = 0xf0 | ((u >> 18) & 0x0f),
        utf_buffer[1] = 0x80 | ((u >> 12) & 0x3f),
        utf_buffer[2] = 0x80 | ((u >> 6) & 0x3f),
        utf_buffer[3] = 0x80 | (u & 0x3f);
    else if (u < 0x4000000)
        utf_buffer[0] = 0xf8 | ((u >> 24) & 0x0f),
        utf_buffer[1] = 0x80 | ((u >> 18) & 0x3f),
        utf_buffer[2] = 0x80 | ((u >> 12) & 0x3f),
        utf_buffer[3] = 0x80 | ((u >> 6) & 0x3f),
        utf_buffer[4] = 0x80 | (u & 0x3f);
    else    utf_buffer[0] = 0xfc | ((u >> 30) & 0x01),
        utf_buffer[1] = 0x80 | ((u >> 24) & 0x3f),
        utf_buffer[2] = 0x80 | ((u >> 18) & 0x3f),
        utf_buffer[3] = 0x80 | ((u >> 12) & 0x3f),
        utf_buffer[4] = 0x80 | ((u >> 6) & 0x3f),
        utf_buffer[5] = 0x80 | (u & 0x3f);
    return utf_buffer;
}

/* This slow and ugly code is used by the terminal utf_8_io */
char *cp2utf_8(int from, int c)
{
    register int j;

    if (codepages[from].table == table_utf_8 || c < 128)
        return strings[c];

    for (j = 0; codepages[from].table[j].c; j++)
        if (codepages[from].table[j].c == c)
            return encode_utf_8(codepages[from].table[j].u);

    return encode_utf_8(UCS_NO_CHAR);
}

#if 0
void add_utf_8(struct conv_table *ct, unicode_val u, char *str)
{
    char *p = encode_utf_8(u);
    while (p[1]) {
        if (ct[*p].t) ct = ct[*p].u.tbl;
        else {
            struct conv_table *nct;
            if (ct[*p].u.str != no_str) {
                internal("bad utf encoding #1");
                return;
            }
            if (!(nct = mem_alloc(sizeof(struct conv_table) * 256))) return;
            memset(nct, 0, sizeof(struct conv_table) * 256);
            new_translation_table(nct);
            ct[*p].t = 1;
            ct[*p].u.tbl = nct;
            ct = nct;
        }
        p++;
    }
    if (ct[*p].t) {
        internal("bad utf encoding #2");
        return;
    }
    if (ct[*p].u.str == no_str) ct[*p].u.str = str;
}
#endif

struct conv_table utf_table[256];
int utf_table_init = 1;

static void free_utf_table(void)
{
    register int i;

    for (i = 128; i < 256; i++)
        mem_free(utf_table[i].u.str);
}

#if 0
static struct conv_table *get_translation_table_to_utf_8(int from)
{
    register int i;
    static int lfr = -1;

    if (from == -1) return NULL;
    if (from == lfr) return utf_table;
    if (utf_table_init)
        memset(utf_table, 0, sizeof(struct conv_table) * 256),
        utf_table_init = 0;
    else
        free_utf_table();

    for (i = 0; i < 128; i++)
        utf_table[i].u.str = strings[i];

    if (codepages[from].table == table_utf_8) {
        for (i = 128; i < 256; i++)
            utf_table[i].u.str = stracpy(strings[i]);
        return utf_table;
    }

    for (i = 128; i < 256; i++)
        utf_table[i].u.str = NULL;

    for (i = 0; codepages[from].table[i].c; i++) {
        unicode_val u = codepages[from].table[i].u;

        if (!utf_table[codepages[from].table[i].c].u.str)
            utf_table[codepages[from].table[i].c].u.str =
                stracpy(encode_utf_8(u));
    }

    for (i = 128; i < 256; i++)
        if (!utf_table[i].u.str)
            utf_table[i].u.str = stracpy(no_str);

    return utf_table;
}
#endif

struct conv_table table[256];
static int first = 1;

void free_conv_table()
{
    if (!utf_table_init) free_utf_table();
    if (first) memset(table, 0, sizeof(struct conv_table) * 256), first = 0;
    new_translation_table(table);
    mem_free(no_str), no_str = NULL;
}

#if 0
struct conv_table *get_translation_table(int from, int to)
{
    int i;
    static int lfr = -1;
    static int lto = -1;
    if (first) memset(table, 0, sizeof(struct conv_table) * 256), first = 0;
    if (/*from == to ||*/ from == -1 || to == -1) return NULL;
    if (codepages[to].table == table_utf_8)
        return get_translation_table_to_utf_8(from);
    if (from == lfr && to == lto) return table;
    lfr = from; lto = to;
    new_translation_table(table);
    if (codepages[from].table == table_utf_8) {
        int j;
        for (j = 0; codepages[to].table[j].c; j++) add_utf_8(table, codepages[to].table[j].u, strings[codepages[to].table[j].c]);
        for (i = 0; unicode_7b[i].x != -1; i++) if (unicode_7b[i].x >= 0x80) add_utf_8(table, unicode_7b[i].x, unicode_7b[i].s);
    } else for (i = 128; i < 256; i++) {
        int j;
        char *u;
        for (j = 0; codepages[from].table[j].c; j++) {
            if (codepages[from].table[j].c == i) goto f;
        }
        continue;
        f:
        u = u2cp(codepages[from].table[j].u, to);
        if (u) table[i].u.str = u;
    }
    return table;
}

static inline int xxstrcmp(char *s1, char *s2, int l2)
{
    while (l2) {
        if (*s1 > *s2) return 1;
        if (!*s1 || *s1 < *s2) return -1;
        s1++, s2++, l2--;
    }
    return !!*s1;
}
#endif




int get_cp_index(char *n)
{
    int i, a, p, q;
    int ii = -1, ll = 0;
    for (i = 0; codepages[i].name; i++) {
        for (a = 0; codepages[i].aliases[a]; a++) {
            for (p = 0; n[p]; p++) {
                if (upcase(n[p]) == upcase(codepages[i].aliases[a][0])) {
                    for (q = 1; codepages[i].aliases[a][q]; q++) {
                        if (upcase(n[p+q]) != upcase(codepages[i].aliases[a][q])) goto fail;
                    }
                    if (strlen(codepages[i].aliases[a]) > ll) {
                        ll = strlen(codepages[i].aliases[a]);
                        ii = i;
                    }
                }
                fail:;
            }
        }
    }
    return ii;
}

char *get_cp_name(int index)
{
    if (index < 0) return "none";
    return codepages[index].name;
}

#if 0
char *get_cp_mime_name(int index)
{
    if (index < 0) return "none";
    if (!codepages[index].aliases) return NULL;
    return codepages[index].aliases[0];
}
#endif

int is_cp_special(int index)
{
    return codepages[index].table == table_utf_8;
}

Generated by  Doxygen 1.6.0   Back to index