Advertisement
Guest User

Untitled

a guest
Mar 20th, 2019
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.58 KB | None | 0 0
  1. #pragma once
  2. #include "Dataset.h"
  3. #include "Tree.h"
  4.  
  5. // numara de cate ori apare clasa in dataset
  6. int classCount(Dataset ds, std::string className)
  7. {
  8. int classIndex = ds.nrAttributes - 1;
  9. int count = 0;
  10. for (int i = 0; i < ds.nrRecords; ++i)
  11. {
  12. if (ds.getDataValue(i, classIndex) == className)
  13. {
  14. count++;
  15. }
  16. }
  17. return count;
  18. }
  19.  
  20. TreeNode *RandomTree(Dataset ds, std::string attribValue)
  21. {
  22. TreeNode *node = new TreeNode; // se genereaza un nou nod
  23. node->branchName = attribValue; // ramura care se incheie cu acest nod va avea ca eticheta valoarea atributului nodului parinte
  24.  
  25. /*daca toate record-urile din ds au aceeasi clasa,
  26. node->name = numele clasei respective
  27. return node;
  28. */
  29. std::string _class = ds.dataValues[ds.dataValues.size() - 1];
  30. bool sameClass = true;
  31. for (int i = 1; i < ds.nrAttributes; ++i)
  32. {
  33. if (ds.dataValues[i] != _class)
  34. {
  35. sameClass = false;
  36. break;
  37. }
  38. }
  39. if (sameClass)
  40. {
  41. node->name = _class;
  42. return node;
  43. }
  44.  
  45. int attribIdx = rand() % ds.attributes.size(); // alegem un atribut aleator
  46. node->name = ds.attributes[attribIdx].name; // nodul nou creat ia valoarea acestui atribut
  47.  
  48. // cream cate o ramura pentru fiecare valoare a atributului
  49. for (int i = 0; i < ds.attributes[attribIdx].values.size(); i++)
  50. {
  51. std::string attribValue = ds.attributes[attribIdx].values[i]; // valoarea atributului este eticheta noii ramuri
  52.  
  53. Dataset subset; // submultimea ce contine elementele din ds cu valoarea attribValue a atributului ales anterior
  54. subset.name = ds.name;
  55. subset.nrAttributes = ds.nrAttributes;
  56. subset.nrRecords = 0;
  57. for (int i = 0; i < ds.nrAttributes; ++i) // copiem intai atributele
  58. {
  59. subset.attributes.push_back(ds.attributes[i]);
  60. }
  61. for (int i = 0; i < ds.nrRecords; ++i) // parcurgem multimea de inregistrari
  62. {
  63. std::string currVal = ds.getDataValue(i, attribIdx);
  64. if (currVal == attribValue) // daca am gasit un element cu attribValue
  65. {
  66. for (int j = 0; j < ds.nrAttributes; ++j) // copiem inregistrarea respectiva
  67. {
  68. subset.dataValues[subset.nrRecords * subset.nrAttributes + j] = currVal;
  69. }
  70. ++subset.nrRecords;
  71. }
  72. }
  73.  
  74. if (subset.dataValues.empty())
  75. {
  76. // facem maximul pe numarul de aparitii ale claselor
  77. int classIndex = ds.nrAttributes - 1;
  78. int numberOfClasses = ds.attributes[classIndex].values.size();
  79.  
  80. int maxClassCount = classCount(ds, ds.attributes[classIndex].values[0]);
  81. std::string maxClass = ds.attributes[classIndex].values[0];
  82. int currentClassCount;
  83. std::string currentClass;
  84. for (int i = 1; i < numberOfClasses; ++i)
  85. {
  86. currentClassCount = classCount(ds, ds.attributes[classIndex].values[i]);
  87. currentClass = ds.attributes[classIndex].values[i];
  88.  
  89. if (currentClassCount > maxClassCount)
  90. {
  91. maxClassCount = currentClassCount;
  92. maxClass = currentClass;
  93. }
  94. }
  95.  
  96. node->addChild(maxClass /* clasa care apare cel mai frecvent in ds */, attribValue);
  97. }
  98. else
  99. {
  100. node->addChild(RandomTree(subset, attribValue));
  101. }
  102. }
  103.  
  104. return node;
  105. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement