Untitled

#include <wchar.h>
#include <stdio.h>
#include <stdlib.h>
#include <locale.h>
#include <wctype.h>

#define BASE_COUNT_STRUCTURE 20
#define BASE_LENGHT_STR 50
#define Current_symbol text->all_str[line]->str[i]
#define BASE_WORD_COUNT 30
#define BASE_WORD_LENGHT 20


struct Sentence//предложение и количество символов в нем
{
    wchar_t* str;
    int lenght;
    int latin_count;
};

struct Text//Текст, массив указателей на структуры предложений и количество предложений
{
    struct Sentence** all_str;
    int lenght;
};

struct Word
{
    wchar_t* characters;
    int count;
};


struct Words//Слово и его количество в тексте
{
    struct Word** str;
    int count;
};

void more_memory_word(struct Word *word, int *count_sym)
{
    *count_sym+=*count_sym;
    word->characters=(wchar_t*)realloc(word->characters,*count_sym * sizeof(wchar_t));

}

void more_memory_words(struct Words *words, int *count_words)//Выделение памяти под слова. Способно увеличить количество слов и выдает каждому базовую длину.
{
    int i;
    words->str=(struct Word**)realloc(words->str,(*count_words+BASE_WORD_COUNT)*sizeof(struct Word*));
    for (i=*count_words;i<*count_words+BASE_WORD_COUNT;i++)
    {
        words->str[i]=(struct Word*)malloc(sizeof(struct Word));
    }
    for (i=*count_words;i<*count_words+BASE_WORD_COUNT;i++)
    {
        words->str[i]->characters=(wchar_t*)malloc(BASE_WORD_LENGHT*sizeof(wchar_t));
    }
    *count_words+=BASE_WORD_COUNT;
}

void more_memory_struct(struct Text *text, int *count_struct)//Выделение памяти под Текст, способно увеличивать количество предложений, выделяет базовое количество символов в предложении.
{
    int i = 0;
    text->all_str=(struct Sentence**)realloc(text->all_str,(*count_struct+BASE_COUNT_STRUCTURE)*sizeof(struct Sentence*));
    for (i=*count_struct;i<((*count_struct)+BASE_COUNT_STRUCTURE);i++)
    {
        text->all_str[i]=(struct Sentence*)malloc(sizeof(struct Sentence));
    }
    for (i=*count_struct;i<((*count_struct)+BASE_COUNT_STRUCTURE);i++)
    {
        (text->all_str[i])->str=(wchar_t*)malloc(BASE_LENGHT_STR*sizeof(wchar_t));
    }
    *count_struct+=BASE_COUNT_STRUCTURE;
}


void more_memory_sentence(struct Sentence *sentence,int* lenght_str)//Выделение памяти по Предложения, увеличивает количество памяти под каждое.
{
    sentence->str=(wchar_t*)realloc(sentence->str,(*lenght_str+BASE_LENGHT_STR)*sizeof(wchar_t));
    *lenght_str+=BASE_LENGHT_STR;
}


int compare_latin_count (const void ** num1, const void ** num2)
{
  if ( (**(struct Sentence**)num1).latin_count <  (**(struct Sentence**)num2).latin_count ) return -1;
  if ( (**(struct Sentence**)num1).latin_count == (**(struct Sentence**)num2).latin_count ) return 0;
  if ( (**(struct Sentence**)num1).latin_count >  (**(struct Sentence**)num2).latin_count ) return 1;
}

int found_upper_reg_and_special_sym(struct Sentence *sentence)
{
    int i;
    int flag_special_symbol = 0;
    int flag_upper_reg = 0;
    for (i=0;i<sentence->lenght;i++)
    {
        if (((sentence->str[i]>'A') && (sentence->str[i]<'Z')) || ((sentence->str[i]>'А') || (sentence->str[i]<'Я')))
            flag_upper_reg=1;
        if (iswalnum(sentence->str[i])==0)
            flag_special_symbol=1;
    }
    if (flag_special_symbol>flag_upper_reg)
        return 1;
    else
        return 0;
}

int compare(const wchar_t* str1,const wchar_t* str2)//Компаратор, сравнивает посимвольно две строки без учета регистра, подойдет и для слов.
{
    int i;
    if (wcslen(str1) != wcslen(str2))
        return 0;
    else
    {
        for (i=0;i<wcslen(str1);i++)
            if (towlower(str1[i])!=towlower(str2[i]))
                return 0;
    }
    return 1;
}

void delete_repeating_sentence(struct Text *text, int num)//Удаление повторяющегося предложение, переданного функцией find_repeating_sentence.
{
    int i;
    free (text->all_str[num]);
    for (i=num;i<(text->lenght)-1;i++)
    {
        text->all_str[i]=text->all_str[i+1];
    }
    text->all_str[text->lenght]= NULL;
    text->lenght-=1;
}

void find_repeating_sentence(struct Text *text)//Поиск повторящихся предложений.
{
    int i,j,g;
    for (i=0;i<((text->lenght)-1);i++)
    {
        for (j=(i+1);j<(text->lenght);j++)
        {
            if (compare(text->all_str[i]->str,text->all_str[j]->str))
            {
                delete_repeating_sentence(text, j);
                j--;
            }
        }
    }
}


int input_text(struct Text *text, int* count_struct)//Считывание текста, считывает посимвольно, пока не встретит знак переноса строки, начинает записывать в новое предложение, когда встречает точку. ЗАЩИТЫ ОТ КРИВОРУКОСТИ ПОЛЬЗОВАТЕЛЯ НЕТ ПОКА ЧТО.
{

    int i = 0;
    int line = 0;
    int current_lenght_str = 0;
    int flag_first_symbol = 0;
    int lenght_str=BASE_LENGHT_STR;
    wchar_t symbol;
    symbol=getwchar();
    while (symbol!='\n')
    {
        if (symbol!=' ')
            flag_first_symbol=1;
        if (flag_first_symbol)
        {
            Current_symbol=symbol;
            if (i==(lenght_str-1))
                more_memory_sentence(text->all_str[line],&lenght_str);
            i+=1;
            if (symbol=='.')
            {
                Current_symbol='\0';
                text->all_str[line]->lenght=i;
                line+=1;
                i=0;
                lenght_str=BASE_LENGHT_STR;
                flag_first_symbol = 0;
            }
        }
        symbol=getwchar();
        if (line == *count_struct)
            more_memory_struct(text, count_struct);

    }
    text->lenght=line;
}


void print_words_with_count(struct Text *text)//проблема с этой функцией
//она должна создавать временный массив длиной равной текущему предложению, записывать туда его. Делать wcstok
//а дальше в переменной current_word хранить текущее слово, сравнивая его с уже записанными ранее. в случа если слово уже было
// должно добавить 1 к его количеству, если не было, записать его с количеством 1.
// сейчас я сделал дебажный вывод, который показывает: сначала строку которая подается на вход и ее длину, потом
// либо показывает какое новое слово было записано, либо если были сравнение показывает пары сравниваемых слов
// если  было найдено соответствие, то пишет слово и знак +.
// на некоторых тестах происходит дичь.
// На тесте q Q a a. q a. t. t. откуда-то берет букву а после t.
// в тесте q q q q w. e w w. r. вообще появляются какие-то неизведанные символы.
//Во втором тесте при первом копировании строки в временный массив первое предложение копирует как q q q q w.\x1db61
// хз что это за дичь
{
    int line,i,num;
    wchar_t *current_word;
    int space_for_word = BASE_WORD_LENGHT;
    int flag_word_detected = 0;
    struct Words words = {NULL, 0};
    int count_words= 0;
    wchar_t *check;
    wchar_t *state;
    wchar_t *temp_sentence = NULL;
    words.count=0;
    more_memory_words(&words, &count_words);
    for (line=0;line<(text->lenght);line++)
    {
        //wprintf(L"%ls %d\n",text->all_str[line]->str, (text->all_str[line]->lenght));
        temp_sentence = NULL;
        temp_sentence = (wchar_t*)realloc(temp_sentence,text->all_str[line]->lenght * sizeof(wchar_t));
        wcsncpy(temp_sentence,text->all_str[line]->str,((text->all_str[line]->lenght)+1));// здесь
       // wprintf(L"%ls\n",temp_sentence);
        current_word = wcstok(temp_sentence,L" .,",&state);
        while (current_word!= NULL)
        {
            flag_word_detected = 0;
            for (num = 0; num < words.count; num++)
            {
                //wprintf(L"|%ls| %ls|   ",current_word,words.str[num]->characters);
                if (compare(current_word, words.str[num]->characters)==1)
                {
                    words.str[num]->count += 1;
                    flag_word_detected = 1;
                    //wprintf(L"%ls +\n",current_word);
                }
            }
            if (flag_word_detected==0)
            {
                while (space_for_word < wcslen(current_word))
                    more_memory_word(words.str[words.count], &space_for_word);
                //wprintf(L"%ls  1",current_word);
                 wcsncpy(words.str[words.count]->characters,current_word,(wcslen(current_word)+1));

                //wprintf(L"%ls  2\n",current_word);
                words.str[words.count]->count = 1;
                space_for_word = BASE_WORD_LENGHT;
                words.count+=1;
                //wprintf(L"новое слово %ls",current_word);
                //wprintf(L"\n");
                if (words.count==count_words)
                {
                  more_memory_words(&words, &count_words);
                }
            }

            current_word = wcstok(NULL,L" .,",&state);

        }

        free (temp_sentence);
    }
    //wprintf(L"\n");
    for (i=0; i<words.count;i++)
    {
        wprintf(L"%ls %d\n", words.str[i]->characters, words.str[i]->count);
    }

}

int count_latin_symbol(struct Sentence *sentence)
{
  int i,count;
  count=0;
  for (i=0;i<(sentence->lenght);i++)
  {
      if ((towlower(sentence->str[i])<='z') && (towlower(sentence->str[i])>='a'))
      count+=1;

  }
  return count;
}

void sort_by_lalin_count(struct Text *text)
{
    int line;
    for (line=0;line<(text->lenght);line++)
    {
        text->all_str[line]->latin_count=count_latin_symbol(text->all_str[line]);
    }
    qsort(text->all_str,text->lenght,sizeof(struct Sentence*),compare_latin_count);
}

void delete_specsym_and_noupsym_sentence(struct Text *text)
{
    int i;
    for (i=0;i<text->lenght;i++)
    {
        if (found_upper_reg_and_special_sym(text->all_str[i])==1)
            delete_repeating_sentence(text->all_str, i);
    }
}


int main()
{
    int i=0;
    setlocale(LC_CTYPE, "");
    int count_struct = 0;
    struct Text text = {NULL, 0};
    more_memory_struct(&text, &count_struct );
    input_text(&text, &count_struct);
    find_repeating_sentence(&text);
    for (i=0; i<(text.lenght);i++)
    wprintf(L"%ls %d", text.all_str[i]->str, text.all_str[i]->lenght);
    wprintf(L"\n");
    print_words_with_count(&text);
    //sort_by_lalin_count(&text);
    for (i=0; i<(text.lenght);i++)
    wprintf(L"%ls %d", text.all_str[i]->str, text.all_str[i]->lenght);
    wprintf(L"\n");
    return 0;
}