Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <string>
- #include <iomanip>
- #include<math.h>
- using namespace std;
- class Node {
- //Cuva vjerovatnocu i pozicije od kojih je nastao ovaj cvor(ova vjerovatnoca)
- public:
- float _probability;
- vector<int> _positions;
- };
- void sort(vector<Node> &array) {
- for (int i = 0; i < array.size(); i++) {
- int position = i;
- for (int j = i + 1; j < array.size(); j++) {
- if (array[j]._probability > array[position]._probability) position = j;
- }
- Node temp = array[i];
- array[i] = array[position];
- array[position] = temp;
- }
- }
- class Collection {
- public:
- vector<char> _characters;
- vector<int> _frequencies;
- vector<string> _huffmancode;
- vector<float> _probabilities;
- int _total; // duzina recenice/zbir frekvencija
- float _L;
- float _entropy;
- float _efficiency;
- float _entropyOfExtendedSource;
- Collection(){}
- #pragma region konstruktor koji pravi kolekciju kada je ulaz niz frekvencija
- Collection(int frequencies[],int size) {
- _total = 0;
- for (int i = 0; i < size; i++) {
- _frequencies.push_back(frequencies[i]);
- _total += frequencies[i];
- }
- for (int i = 0; i < size; i++) {
- _probabilities.push_back(frequencies[i]/float(_total));
- }
- }
- #pragma endregion
- #pragma region konstruktor koji pravi kolekciju kada je ulaz recenica
- Collection(string sentence) {
- _total = sentence.size();
- for (int i = 0; i < sentence.length(); i++) {
- bool found = false; //bool varijabla koja nam govori da li slovo vec postoji u nizu
- for (int j = 0; j < _characters.size(); j++) {
- if (sentence[i] == _characters[j]) {
- _frequencies[j]++;
- found = true;
- }
- }
- if (!found) {
- _characters.push_back(sentence[i]);
- _frequencies.push_back(1);
- }
- }
- for (int i = 0; i < _frequencies.size(); i++) {
- _probabilities.push_back(float(_frequencies[i])/float(_total));
- }
- }
- #pragma endregion
- #pragma region konstruktor koji pravi kolekciju kada je ulaz niz vjerovatnoca
- Collection(float probabilities[], int size) {
- for (int i = 0; i < size; i++) {
- _probabilities.push_back(probabilities[i]);
- }
- }
- #pragma endregion
- //friend ostream& operator<<(ostream& COUT, Collection & obj)
- //{
- // for (int i = 0; i < obj._frequencies.size(); i++)
- // {
- // COUT << obj._characters[i] << ": " << obj._frequencies[i] << endl;
- // }
- // return COUT;
- //}
- void sortCharacters() {
- for (int i = 0; i < _frequencies.size(); i++) {
- int position = i;
- for (int j = i + 1; j < _frequencies.size(); j++) {
- if (_frequencies[j] > _frequencies[position]) position = j;
- }
- char temp = _characters[i];
- _characters[i] = _characters[position];
- _characters[position] = temp;
- int tempf = _frequencies[i];
- _frequencies[i] = _frequencies[position];
- _frequencies[position] = tempf;
- float tempp = _probabilities[i];
- _probabilities[i] = _probabilities[position];
- _probabilities[position] = tempp;
- }
- }
- void sortFrequencies() {
- for (int i = 0; i < _frequencies.size(); i++) {
- int position = i;
- for (int j = i + 1; j < _frequencies.size(); j++) {
- if (_frequencies[j] > _frequencies[position]) position = j;
- }
- int tempf = _frequencies[i];
- _frequencies[i] = _frequencies[position];
- _frequencies[position] = tempf;
- float tempp = _probabilities[i];
- _probabilities[i] = _probabilities[position];
- _probabilities[position] = tempp;
- }
- }
- void sortProbabilities() {
- for (int i = 0; i < _probabilities.size(); i++) {
- int position = i;
- for (int j = i + 1; j < _probabilities.size(); j++) {
- if (_probabilities[j] > _probabilities[position]) position = j;
- }
- float tempf = _probabilities[i];
- _probabilities[i] = _probabilities[position];
- _probabilities[position] = tempf;
- }
- }
- void huffman() {
- vector<Node>tempVector; //privremeni niz vjerovatnoca ili frekvencija koji prilikom racunanja huffmanovog koda se smanjiva usljed sabiranja najmanjih vjerovatnoca
- int g = 0;
- for (int i = 0; i < _probabilities.size(); i++) {
- _huffmancode.push_back("");
- Node n; //privremeni cvor
- n._probability = _probabilities[i];
- n._positions.push_back(i);
- tempVector.push_back(n);
- }
- while (tempVector.size() > 1)
- {
- g++;
- cout << g << endl;
- //trazim najmanji i drugi najmanji
- /*int smallest = 0;
- int secondsmallest = 0;
- for (int j = 0; j < tempVector.size(); j++) {
- if (tempVector[j]._probability <= tempVector[smallest]._probability)
- smallest = j;
- }
- for (int j = 0; j < tempVector.size(); j++) {
- if (tempVector[j]._probability <= tempVector[secondsmallest]._probability && j != smallest)
- secondsmallest = j;
- }
- cout << "najmanji:" << tempVector[smallest]._probability << endl;
- cout << "drugi najmanji:" << tempVector[secondsmallest]._probability << endl;*/
- sort(tempVector); //sortiramo vector da bi nasli dva najmanja
- int smallest = tempVector.size() - 1;
- int secondsmallest = tempVector.size() - 2;
- /*for (int i = 0; i < tempVector.size(); i++) {
- cout << tempVector[i]._probability << " ";
- for (int j = 0; j < tempVector[i]._positions.size(); j++) {
- cout << tempVector[i]._positions[j] << ",";
- }
- cout << endl;
- }
- cout << endl;
- cout << endl;
- cout << endl;
- cout << "najmanji:" << tempVector[smallest]._probability << endl;
- cout << "drugi najmanji:" << tempVector[secondsmallest]._probability << endl;*/
- vector<Node>temparray; //privremeni vector za tempVector
- //dodajemo 0 i 1 na pozicije gdje su dva najmanja elementa
- for (int i = 0; i < tempVector[smallest]._positions.size(); i++) {
- string t = _huffmancode[tempVector[smallest]._positions[i]];
- _huffmancode[tempVector[smallest]._positions[i]] = "0";
- _huffmancode[tempVector[smallest]._positions[i]] += t;
- }
- for (int i = 0; i < tempVector[secondsmallest]._positions.size(); i++) {
- string t = _huffmancode[tempVector[secondsmallest]._positions[i]];
- _huffmancode[tempVector[secondsmallest]._positions[i]] = "1";
- _huffmancode[tempVector[secondsmallest]._positions[i]] += t;
- }
- Node tempnode; // privremeni node za sumu dvije najmanje frekvencije
- tempnode._probability = tempVector[smallest]._probability + tempVector[secondsmallest]._probability;
- tempnode._positions = tempVector[smallest]._positions;
- tempnode._positions.insert(tempnode._positions.end(), tempVector[secondsmallest]._positions.begin(), tempVector[secondsmallest]._positions.end());
- temparray.push_back(tempnode);
- //popunjavamo ostale clanove tempVectora u privremeni vector tempArray
- for (int i = 0; i < tempVector.size(); i++) {
- if (i != smallest && i != secondsmallest) {
- temparray.push_back(tempVector[i]);
- }
- }
- tempVector = temparray;
- }
- }
- void AverageCodeLength(){
- _L = 0;
- for (int i = 0; i < _probabilities.size(); i++) {
- _L += _probabilities[i]* _huffmancode[i].length();
- }
- }
- void OutputEverythingForCharacters() {
- cout << "char|freq|huffman code|code length|probability|length*prob|" << endl;
- cout << "----------------------------------------------------------+" << endl;
- for (int i = 0; i < _huffmancode.size(); i++) {
- cout << setw(4) << _characters[i] << "|" << setw(4) << _frequencies[i] << "| " << setw(11) << _huffmancode[i] << "| "
- << setw(10) << _huffmancode[i].length() << "| " << setw(10) << _probabilities[i] << "|" << setw(11) << _probabilities[i] * _huffmancode[i].length() << "|" << endl;
- }
- cout << "----------------------------------------------------------+" << endl;
- cout << " |" << setw(4) << _total << "| | |" << setw(11) << "" << "|" << setw(11) << _L << "|" << endl;
- cout << "----------------------------------------------------------+" << endl;
- }
- void OutputEverythingForFrequencies() {
- cout << "|freq|huffman code|code length|probability|length*prob|" << endl;
- cout << "----------------------------------------------------------+" << endl;
- for (int i = 0; i < _huffmancode.size(); i++) {
- cout << "|" << setw(4) << _frequencies[i] << "| " << setw(11) << _huffmancode[i] << "| "
- << setw(10) << _huffmancode[i].length() << "| " << setw(10) << _probabilities[i] << "|" << setw(11) << _probabilities[i]* _huffmancode[i].length() << "|" << endl;
- }
- cout << "----------------------------------------------------------+" << endl;
- cout << setw(4) << _total << "| | |" << setw(11) << "" << "|" << setw(11) << _L << "|" << endl;
- cout << "----------------------------------------------------------+" << endl;
- }
- void OutputEverythingForProbabilities() {
- cout << "huffman code|code length|probability|length*prob|" << endl;
- cout << "----------------------------------------------------------+" << endl;
- for (int i = 0; i < _huffmancode.size(); i++) {
- cout << setw(11) << _huffmancode[i] << "| "
- << setw(10) << _huffmancode[i].length() << "| " << setw(10) << _probabilities[i] << "|" << setw(11) << _probabilities[i]* _huffmancode[i].length() << "|" << endl;
- }
- cout << "----------------------------------------------------------+" << endl;
- cout << "| | |" << setw(11) << "" << "|" << setw(11) << _L << "|" << endl;
- cout << "----------------------------------------------------------+" << endl;
- }
- void Entropy() {
- float suma = 0;
- for (int i = 0; i < _frequencies.size(); i++) {
- suma += _frequencies[i] * log2(_frequencies[i]);
- }
- _entropy = log2(_total) - (1 / float(_total))*suma;
- }
- void Efficiency() {
- _efficiency = _entropy / _L;
- }
- void EntropyOfExtendedSource() {
- _entropyOfExtendedSource = _entropy*2;
- }
- void ExtendedCollectionMaker(Collection coll) {
- _total = 0;
- for (int i = 0; i < coll._frequencies.size(); i++) {
- for (int j = 0; j < coll._frequencies.size(); j++) {
- _frequencies.push_back(coll._frequencies[i] * coll._frequencies[j]);
- _total += coll._frequencies[i] * coll._frequencies[j];
- }
- }
- for (int i = 0; i < _frequencies.size(); i++) {
- _probabilities.push_back(_frequencies[i] / float(_total));
- }
- /////////
- /*for (int j = 0; j < _frequencies.size(); j++) {
- cout << _frequencies[j] << " " ;
- }
- cout << endl;*/
- /////////
- }
- };
- int main() {
- clock_t startTime = clock();
- /*Collection coll("La noche cae, brumosa ya y morada. Vagas claridades malvas y verdes perduran tras la torre de la iglesia. El camino sube, lleno de sombras, de campanillas, de fragancia de hierba, de canciones, de cansancio y de anhelo.");
- coll.sortCharacters();
- coll.huffman();
- coll.AverageCodeLength();
- coll.OutputEverythingForCharacters();*/
- /*int arr[] = { 27, 16, 4, 56, 22, 2, 78, 45, 36, 13, 12, 7 };*/
- int arr[] = { 83,69,67,82,69,84,79,32,68,69,32,85,78,79,32,83,69,67,82,69,84,79,32,83,69,71,85,82,79 };
- Collection coll(arr,size(arr));
- coll.sortFrequencies();
- coll.huffman();
- coll.AverageCodeLength();
- coll.OutputEverythingForFrequencies();
- /*float arr[] = { 0.3, 0.2, 0.1, 0.1, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05 };
- Collection coll(arr, size(arr));
- coll.sortProbabilities();
- coll.huffman();
- coll.AverageCodeLength();
- coll.OutputEverythingForProbabilities(); */
- coll.Entropy();
- coll.Efficiency();
- coll.EntropyOfExtendedSource();
- cout <<"Entropy: "<< coll._entropy << endl;
- cout <<"Efficiency: " << coll._efficiency << endl;
- cout <<"Entropy F2: " << coll._entropyOfExtendedSource << endl;
- Collection coll2;
- coll2.ExtendedCollectionMaker(coll);
- coll2.huffman();
- coll2.AverageCodeLength();
- coll2.Entropy();
- coll2.Efficiency();
- cout << "Efficiency: " << coll2._efficiency << endl;
- // some code here
- // to compute its execution duration in runtime
- cout << double(clock() - startTime) / (double)CLOCKS_PER_SEC << " seconds." << endl;
- system("pause");
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement