Advertisement
Guest User

Untitled

a guest
Mar 20th, 2019
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.09 KB | None | 0 0
  1. #pragma once
  2. #include "Dataset.h"
  3. #include "Tree.h"
  4.  
  5.  
  6. // numara de cate ori apare clasa in dataset
  7. int classcount(Dataset ds, std::string classname)
  8. {
  9. int classindex = ds.nrAttributes - 1;
  10. int count = 0;
  11. for (int i = 0; i < ds.nrRecords; ++i)
  12. {
  13. if (ds.getDataValue(i, classindex) == classname)
  14. count++;
  15. }
  16. return count;
  17. }
  18.  
  19.  
  20. TreeNode *RandomTree(Dataset ds, std::string attribValue)
  21. {
  22. TreeNode *node = new TreeNode; // se genereaza un nou nod
  23. node->branchName = attribValue; // ramura care se incheie cu acest nod va avea ca eticheta valoarea atributului nodului parinte
  24.  
  25. /*daca toate record-urile din ds au aceeasi clasa,
  26. node->name = numele clasei respective
  27. return node;
  28. */
  29. std::string _class = ds.dataValues[ds.dataValues.size() - 1];
  30. bool sameClass = true;
  31. for (int i = 1; i < ds.nrAttributes; ++i)
  32. {
  33. if (ds.dataValues[i] != _class)
  34. {
  35. sameClass = false;
  36. break;
  37. }
  38. }
  39. if (sameClass)
  40. {
  41. node->name = _class;
  42. return node;
  43. }
  44.  
  45.  
  46.  
  47. int attribIdx = rand() % (ds.attributes.size()-1); //alegem un atribut aleator
  48. node->name = ds.attributes[attribIdx].name; //nodul nou creat ia valoarea acestui atribut
  49.  
  50. //cream cate o ramura pentru fiecare valoare a atributului
  51. for (int i = 0; i < ds.attributes[attribIdx].values.size(); i++)
  52. {
  53. std::string attribValue = ds.attributes[attribIdx].values[i]; //valoarea atributului este eticheta noii ramuri
  54. Dataset subset; // submultimea ce contine elementele din ds cu valoarea attribValue a atributului ales anterior
  55. subset.name = ds.name;
  56. subset.nrAttributes = ds.nrAttributes;
  57. subset.nrRecords = 0;
  58.  
  59. for (int i = 0; i < ds.nrAttributes; ++i) // copiem intai atributele
  60. {
  61. subset.attributes.push_back(ds.attributes[i]);
  62. }
  63. for (int i = 0; i < ds.nrRecords; ++i) // parcurgem multimea de inregistrari
  64. {
  65. std::string currVal = ds.getDataValue(i, attribIdx);
  66. if (currVal == attribValue) // daca am gasit un element cu attribValue
  67. {
  68. for (int j = 0; j < ds.nrAttributes; ++j) // copiem inregistrarea respectiva
  69. subset.dataValues[subset.nrRecords * subset.nrAttributes + j] = currVal;
  70. ++subset.nrRecords;
  71. }
  72. }
  73.  
  74.  
  75. if (subset.dataValues.empty()) //daca nu mai sunt instante in subset
  76. {
  77. //facem maximul pe numarul de aparitii ale claselor
  78. int classIndex = ds.nrAttributes - 1;
  79. int numberOfClasses = ds.attributes[classIndex].values.size();
  80.  
  81. int maxClassCount = classCount(ds, ds.attributes[classIndex].values[0]);
  82. std::string maxClass = ds.attributes[classIndex].values[0];
  83. int currentClassCount;
  84. std::string currentClass;
  85.  
  86. for (int i = 1; i < numberOfClasses; ++i)
  87. {
  88. currentClassCount = classCount(ds, ds.attributes[classIndex].values[i]);
  89. currentClass = ds.attributes[classIndex].values[i];
  90.  
  91. if (currentClassCount > maxClassCount)
  92. {
  93. maxClassCount = currentClassCount;
  94. maxClass = currentClass;
  95. }
  96. }
  97.  
  98. node->addChild(maxClass /* clasa care apare cel mai frecvent in ds */, attribValue);
  99. }
  100. else
  101. {
  102. node->addChild(RandomTree(subset, attribValue));
  103. }
  104. }
  105.  
  106. return node;
  107. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement