Untitled

#include <stdio.h>
#include <stdbool.h>
#include <malloc.h>
#include <string.h>

struct data_encoding {
    unsigned char *data;
    int size;
};

struct data_utf_to_unicode {
    unsigned int *data;
    int size;
};

bool add(unsigned int *a, int *size, int *position, unsigned int value) {
    if (*position + 1 >= *size) {
        unsigned int *tmp = realloc(a, sizeof(int) * (*size) * 2 + 1);
        if (!tmp) {
            perror("realloc() ");
            free(a);
            free(tmp);
            return false;
        } else {
            a = tmp;
        }
        *size = *size * 2 + 1;
    }
    a[(*position)++] = value;
    return true;
}

unsigned char *read_bytes(const char *file_name, int *position) {
    FILE *fin;
    int size = 0;
    if ((fin = fopen(file_name, "rb")) == NULL) {
        perror("fopen() ");
        return NULL;
    }
    unsigned char b;
    unsigned char *buf = malloc(0);
    while (fread(&b, 1, 1, fin)) {
        buf[*position] = b;
        (*position)++;
        if (*position + 1 >= size) {
            unsigned char *tmp = realloc(buf, sizeof(int) * size * 2 + 1);
            if (!tmp) {
                perror("realloc() ");
                fclose(fin);
                free(buf);
                free(tmp);
                return (unsigned char *) 2;
            } else {
                buf = tmp;
            }
            size = size * 2 + 1;
        }
    }
    fclose(fin);
    return buf;
}


bool equal_arr(const unsigned char *data, const unsigned char *encode_arr, int encode_arr_size) {
    for (int i = 0; i < encode_arr_size; i++) {
        if (data[i] != encode_arr[i]) {
            return false;
        }
    }
    return true;
}


struct data_utf_to_unicode utf8_to_unicode(const unsigned char *utf_encoding, int size, bool bom) {
    unsigned int *unicode_encoding = malloc(0);
    int i = 3 * bom;
    int data_size = 0;
    int j = 0;
    while (i < size) {
        unsigned int res = 0;
        unsigned int c = utf_encoding[i++];
        if (!(c >> 7u)) {
            res = c;
        } else {
            if ((c & 224u) == 192) {
                unsigned int b2 = (c & 31u) * (1u << 6u);
                unsigned int b1 = utf_encoding[i++] & 63u;
                res = b1 + b2;
            } else if ((c & 240u) == 224) {
                unsigned int b3 = (c & 15u) * (1u << 12u);
                unsigned int b2 = (utf_encoding[i++] & 63u) * (1u << 6u);
                unsigned int b1 = utf_encoding[i++] & 63u;
                res = b1 + b2 + b3;
            } else if ((c & 248u) == 240) {
                unsigned int b4 = (c & 7u) * (1u << 18u);
                unsigned int b3 = (utf_encoding[i++] & 63u) * (1u << 12u);
                unsigned int b2 = (utf_encoding[i++] & 63u) * (1u << 6u);
                unsigned int b1 = (utf_encoding[i++] & 63u);
                res = b1 + b2 + b3 + b4;
            }
        }
        add(unicode_encoding, &data_size, &j, res);
    }
    struct data_utf_to_unicode tmp = {unicode_encoding, j};
    return tmp;
}

struct data_encoding unicode_to_utf8_bom(const unsigned int *data, int size) {
    unsigned char *output_encoding = malloc(sizeof(unsigned char) * 4 * (size + 1));
    int position = 0;
    output_encoding[position++] = 0xEF;
    output_encoding[position++] = 0xBB;
    output_encoding[position++] = 0xBF;
    int i = 0;
    while (i < size) {
        unsigned char res = 0;
        if (data[i] < 1u << 7u) {
            res = data[i];
            output_encoding[position++] = res;
        } else if (data[i] < 1u << 11u) {
            unsigned char b1 = (data[i] & 63u) + 128;
            unsigned char b2 = ((data[i] >> 6u) & 31u) + 192;
            output_encoding[position++] = b2;
            output_encoding[position++] = b1;
            //printf("%X %X ", b2, b1);
        } else if (data[i] < 1u << 16u) {
            unsigned char b1 = (data[i] & 63u) + 128;
            unsigned char b2 = ((data[i] >> 6u) & 63u) + 128;
            unsigned char b3 = ((data[i] >> 12u) & 15u) + 224;
            output_encoding[position++] = b3;
            output_encoding[position++] = b2;
            output_encoding[position++] = b1;
        } else if (data[i] < 1u << 21u) {
            unsigned char b1 = (data[i] & 63u) + 128;
            unsigned char b2 = ((data[i] >> 6u) & 63u) + 128;
            unsigned char b3 = ((data[i] >> 12u) & 63u) + 128;
            unsigned char b4 = ((data[i] >> 18u) & 7u) + 240;
            output_encoding[position++] = b4;
            output_encoding[position++] = b3;
            output_encoding[position++] = b2;
            output_encoding[position++] = b1;
        }
        i++;
    }
    struct data_encoding tmp = {output_encoding, position};
    return tmp;
}

struct data_utf_to_unicode utf16_to_unicode(const unsigned char *utf_encoding, int size, bool is_be) {
    unsigned int* unicode_encoding = malloc(0);
    int i = 2;
    int j = 0;
    int data_size = 0;
    while (i < size) {
        unsigned int c = utf_encoding[i++];
        unsigned int p = utf_encoding[i++];
        unsigned int tmp = 0;
        if (is_be) {
            tmp = (c << 8u) + p;
        } else {
            tmp = (p << 8u) + c;
        }
        unsigned int res = tmp;
        if (tmp >= 55296) {
            c = utf_encoding[i++];
            p = utf_encoding[i++];
            unsigned int tmp2 = 0;
            if (is_be) {
                tmp2 = (c << 8u) + p;
            } else {
                tmp2 = (p << 8u) + c;
            }
            unsigned int b1 = ((tmp - 55296) << 10u) + 65536;
            unsigned int b2 = tmp2 & ((1u << 10u) - 1u);
            res = b1 + b2;
        }
        add(unicode_encoding, &data_size, &j,  res);
    }
    struct data_utf_to_unicode tmp = {unicode_encoding, j};
    return tmp;
}

void put_byte(unsigned char *output_encoding, int *position, unsigned int res, bool is_be) {
    if (is_be) {
        output_encoding[(*position)++] = res >> 8u;
        output_encoding[(*position)++] = res & ((1u << 8u) - 1);
        // printf("%X %X ", res >> 8u, res & ((1u << 8u) - 1));
    } else {
        output_encoding[(*position)++] = res & ((1u << 8u) - 1);
        output_encoding[(*position)++] = res >> 8u;
        //printf("%X %X ", res & ((1u << 8u) - 1), res >> 8u);
    }
}

struct data_encoding unicode_to_utf16(const unsigned int *data, int size, bool is_be) {
    unsigned char *output_encoding = malloc(sizeof(unsigned char) * 4 * (size + 1));
    int i = 0;
    int position = 0;
    put_byte(output_encoding, &position, 0xFEFF, is_be);
    while (i < size) {
        unsigned int res = 0;
        if ((data[i] < (1u << 16u) && (data[i] < 55296 || data[i] > 57343))) {
            res = data[i];
            put_byte(output_encoding, &position, res, is_be);

        } else {
            unsigned int b1 = (data[i] >> 10u) - 64 + 55296;
            unsigned int b2 = (data[i] & ((1u << 10u) - 1)) + 56320;
            put_byte(output_encoding, &position, b1, is_be);
            put_byte(output_encoding, &position, b2, is_be);

        }
        i++;
    }
    struct data_encoding tmp = {output_encoding, position};
    return tmp;
}

struct data_utf_to_unicode utf32_to_unicode(const unsigned char *utf_encoding, int size, bool is_be) {
    unsigned int *unicode_encoding = malloc(0);
    int i = 4;
    int j = 0;
    int data_size = 0;
    while (i < size) {
        unsigned int a = utf_encoding[i++];
        unsigned int b = utf_encoding[i++];
        unsigned int c = utf_encoding[i++];
        unsigned int d = utf_encoding[i++];
        if (is_be) {
            add(unicode_encoding, &data_size, &j, ((a << 24u) + (b << 16u) + (c << 8u) + d));
        } else {
            add(unicode_encoding, &data_size, &j, (d << 24u) + (c << 16u) + (b << 8u) + a);
        }
    }
    struct data_utf_to_unicode tmp = {unicode_encoding, j};
    return tmp;
}

struct data_encoding unicode_to_utf32(const unsigned int *data, int size, bool is_be) {
    unsigned char *output_encoding = malloc(sizeof(unsigned char) * 4 * (size + 1));
    int i = 0;
    int position = 0;
    if (is_be) {
        output_encoding[position++] = 0x00;
        output_encoding[position++] = 0x00;
        output_encoding[position++] = 0xFE;
        output_encoding[position++] = 0xFF;
    } else {
        output_encoding[position++] = 0xFF;
        output_encoding[position++] = 0xFE;
        output_encoding[position++] = 0x00;
        output_encoding[position++] = 0x00;
    }
    while (i < size) {
        unsigned int x = data[i++];
        unsigned int a = (x >> 24u) & 255u;
        unsigned int b = (x >> 16u) & 255u;
        unsigned int c = (x >> 8u) & 255u;
        unsigned int d = x & 255u;
        if (is_be) {
            output_encoding[position++] = a;
            output_encoding[position++] = b;
            output_encoding[position++] = c;
            output_encoding[position++] = d;
        } else {
            output_encoding[position++] = d;
            output_encoding[position++] = c;
            output_encoding[position++] = b;
            output_encoding[position++] = a;
        }
    }
    struct data_encoding tmp = {output_encoding, position};
    return tmp;
}

struct data_encoding read_encoding(const char *fin) {
    int position = 0;
    unsigned char *data = read_bytes(fin, &position);
    struct data_encoding tmp = {data, position};
    return tmp;
}


struct data_utf_to_unicode utf_to_unicode(unsigned char *data, int size) {
    const unsigned char utf8_bom[3] = {0xEF, 0xBB, 0xBF};
    const unsigned char utf16_bom_be[2] = {0xFE, 0xFF};
    const unsigned char utf16_bom_le[2] = {0xFF, 0xFE};
    const unsigned char utf32_bom_be[4] = {0x00, 0x00, 0xFE, 0xFF};
    const unsigned char utf32_bom_le[4] = {0xFF, 0xFE, 0x00, 0x00};
    if (equal_arr(data, utf32_bom_be, 4))return utf32_to_unicode(data, size, true); // "utf32_bom_be";
    if (equal_arr(data, utf32_bom_le, 4))return utf32_to_unicode(data, size, false); //"utf32_bom_le";
    if (equal_arr(data, utf16_bom_be, 2))return utf16_to_unicode(data, size, true); // "utf16_bom_be";
    if (equal_arr(data, utf16_bom_le, 2)) return utf16_to_unicode(data, size, false); // "utf16_bom_le"
    if (equal_arr(data, utf8_bom, 3))return utf8_to_unicode(data, size, true); // "utf8_bom";
    return utf8_to_unicode(data, size, false); // "utf8";
}

struct data_encoding unicode_to_utf(const char *output_encoding, unsigned int *unicode_encoding, int size) {
    if (strcmp(output_encoding, "utf8") == 0) {
        return unicode_to_utf8_bom(unicode_encoding, size);
    }
    if (strcmp(output_encoding, "utf16_be") == 0) {
        return unicode_to_utf16(unicode_encoding, size, true);
    }
    if (strcmp(output_encoding, "utf16_le") == 0) {
        return unicode_to_utf16(unicode_encoding, size, false);
    }
    if (strcmp(output_encoding, "utf32_be") == 0) {
        return unicode_to_utf32(unicode_encoding, size, true);
    }
    if (strcmp(output_encoding, "utf32_le") == 0) {
        return unicode_to_utf32(unicode_encoding, size, false);
    }
}

bool write_output_encoding(const char *file_name, unsigned char *output_data, int size) {
    FILE *fout = NULL;
    if ((fout = fopen(file_name, "wb")) == NULL) {
        perror("fopen() ");
        return false;
    }
    fwrite(output_data, sizeof(unsigned char), size, fout);
    fclose(fout);
    return true;
}

int main(int argc, char **argv) {
    const char *fin, *fout, *output_encoding;
    fin = argv[1];
    fout = argv[2];
    output_encoding = argv[3];
    // считывание кодировка
    struct data_encoding first_data = read_encoding(fin);
    // перекодировка в unicode
    struct data_utf_to_unicode second_data = utf_to_unicode(first_data.data, first_data.size);
    unsigned int *unicode_data = second_data.data;
    int unicode_data_size = second_data.size;
    // перекодировка в utf
    struct data_encoding third_data = unicode_to_utf(output_encoding, unicode_data, unicode_data_size);
    unsigned char *data = third_data.data;
    // вывод кодировки
    write_output_encoding(fout, third_data.data, third_data.size);
    free(first_data.data);
    free(second_data.data);
    free(third_data.data);

}