Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <algorithm>
- #include <string>
- #include <cmath>
- #include <stdio.h>
- using namespace std;
- double H1(int *A, int b ,const string& s) {
- double a = 0 ;
- for (int i = 0; i < b; ++i) {
- a -= (A[i] != 0 ? double(A[i])/ double(s.size()) * (log2(A[i]/double(s.size()))) : double(0));
- }
- return a;
- }
- string remove_spaces(const string &s) {
- string res;
- for (int i = 0; i < s.size(); ++i) {
- if (isalpha(s[i])) {
- res += tolower(s[i]);
- }
- }
- return res;
- }
- int encode(char a, char b) {
- return (a - 'а') + (b - 'а') * 32;
- }
- int getbigram(const string &s, int p) {
- try {
- return encode(s[p], s[p + 1]);
- } catch (...) {
- return 0;
- }
- }
- int* getfreq(const string &_str, int step) {
- string str = remove_spaces(_str);
- int *A = new int[32 * 32];
- for (int i = 0; i < 32 * 32; ++i) {
- A[i] = 0;
- }
- for (int i = 0; i < str.size() - 1; i += step) {
- ++A[getbigram(str, i)];
- }
- return A;
- }
- void printcell(double n) {
- printf("%1.3f|", n);
- }
- void printcell(char c) {
- cout << " " << c << " |";
- }
- void printstick(int n = 199) {
- cout << string(n, '-') << "\n";
- }
- void printrow(int n, const int *A, int s) {
- cout << "|";
- printcell(char('а' + n));
- for (int i = 0; i < 32; ++i) {
- printcell(A[n + i * 32] / double(s));
- }
- cout << "\n";
- printstick();
- }
- void printhead() {
- printstick();
- cout << "| |";
- for (int i = 0; i < 32; ++i) {
- printcell(char('а' + i));
- }
- cout << "\n";
- printstick();
- }
- int sum(int *A, int n) {
- int r = 0;
- for (int i = 0; i < n; ++i) {
- r += A[i];
- }
- return r;
- }
- void print_table(const string &str, int step) {
- int *A = getfreq(str, step),
- s = sum(A, 1024);
- printhead();
- for (int i = 0; i < 32; ++i) {
- printrow(i, A, s);
- }
- cout << "\n";
- }
- int main() {
- setlocale(LC_CTYPE, "rus");
- string s = "апоаыеждрш иьвэадкщлиот эдлкыотрэкдвщлорнлаиотывждларот";
- print_table(s, 1);
- print_table(s, 2);
- int delta = 'я' - 'а' + 2;
- int *A = new int[delta],
- *B = new int[delta];
- for (int i = 0; i < delta; ++i) {
- A[i] = 0;
- B[i] = i;
- }
- for (int i = 0; i < s.size(); ++i) {
- ++A[s[i] == ' ' ? delta - 1 // индекс последнего элемента массива
- : s[i] - 'а'];
- }
- for (int i = 0; i < delta; ++i) {
- for (int j = i + 1; j < delta; ++j) {
- if (A[i] < A[j]) {
- swap(A[i], A[j]);
- swap(B[i], B[j]);
- }
- }
- }
- int total = sum(A, delta);
- printstick();
- cout << "|";
- for (int i = 0; i < delta; ++i) {
- printcell(B[i] == delta - 1 ? ' ' : char('а' + B[i]));
- }
- cout << "\n";
- printstick();
- cout << "|";
- for (int i = 0; i < delta; ++i) {
- printcell(A[i] / double(total));
- }
- cout << "\n";
- printstick();
- cout << "\n" << H1(A, delta,s) << "\n";
- delete[] A;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement