Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include<iostream>
- #include<fstream>
- #include<vector>
- #include<cmath>
- #include<sstream>
- #include<ctime>
- #include<cstdlib>
- #define pb push_back
- using namespace std;
- struct example
- {
- int vals[10];
- example(int arr[])
- {
- for(int i=0;i<10;i++) vals[i]=arr[i];
- }
- };
- struct node
- {
- int type,attr,val;
- int child[11];
- };
- class decision_tree
- {
- int node_cnt;
- vector<node> tree;
- public:
- decision_tree()
- {
- node_cnt=0;
- }
- void make_tree(vector<example> &examples)
- {
- ID3(examples,0);
- }
- int get_result(example ex)
- {
- return match(ex,0);
- }
- private:
- int match(example ex,int node)
- {
- if(tree[node].type==0) return tree[node].val;
- return match(ex,tree[node].child[ex.vals[tree[node].attr]]);
- }
- int ID3(vector<example> &examples,int attr_mask)
- {
- //cout<<examples.size()<<" ";
- int pos=0,neg=0;
- for(int i=0;i<examples.size();i++)
- {
- if(examples[i].vals[9]==1) pos++;
- else neg++;
- }
- //cout<<pos<<" "<<neg<<endl;
- int node_no=node_cnt;
- node_cnt++;
- node nw_node;
- if(pos==examples.size())
- {
- nw_node.type=0;
- nw_node.val=1;
- tree.push_back(nw_node);
- return node_no;
- }
- else if(neg==examples.size())
- {
- nw_node.type=0;
- nw_node.val=0;
- tree.push_back(nw_node);
- return node_no;
- }
- if(attr_mask==(1<<9)-1)
- {
- nw_node.type=0;
- nw_node.val=pos>neg?1:0;
- tree.push_back(nw_node);
- return node_no;
- }
- //cout<<"ok\n";
- int attr=get_best_attr(examples,attr_mask);
- //cout<<examples.size()<<" "<<attr<<endl;
- nw_node.type=1;
- nw_node.attr=attr;
- tree.push_back(nw_node);
- vector<example> tmp[11];
- for(int i=0;i<examples.size();i++)
- {
- tmp[examples[i].vals[attr]].push_back(examples[i]);
- }
- for(int i=1;i<=10;i++)
- {
- if(tmp[i].empty())
- {
- node nd;
- nd.type=0;
- nd.val=pos>neg?1:0;
- tree.push_back(nd);
- tree[node_no].child[i]=node_cnt;
- node_cnt++;
- }
- else
- {
- int n=ID3(tmp[i],attr_mask | (1<<attr));
- tree[node_no].child[i]=n;
- }
- }
- return node_no;
- }
- int get_best_attr(vector<example> &examples,int attr_mask)
- {
- //cout<<examples.size()<<" "<<attr_mask<<" ";
- double mn=5.00; //INF
- int attr;
- for(int i=0;i<9;i++)
- {
- if((attr_mask & (1<<i))==0)
- {
- //cout<<"ok ";
- double tmp=expected_entropy(examples,i);
- //cout<<tmp<<" "<<endl;
- if(tmp<mn) mn=tmp,attr=i;
- }
- }
- return attr;
- }
- double expected_entropy(vector<example> &examples,int attr)
- {
- //cout<<"inside exp_entro "<<attr<<" ";
- int cnt[2][11]={0};
- for(int i=0;i<examples.size();i++)
- {
- cnt[examples[i].vals[9]][examples[i].vals[attr]]++;
- }
- int total=examples.size();
- //cout<<"total "<<total<<"\n";
- double ret=0;
- for(int i=1;i<=10;i++)
- {
- //cout<<cnt[0][i]<<" "<<cnt[1][i]<<" ";
- ret+=double(cnt[0][i]+cnt[1][i])/total;ret*=entropy(cnt[0][i],cnt[1][i]);
- //cout<<ret<<endl;
- }
- return ret;
- }
- double entropy(int a,int b)
- {
- //cout<<"inside entro "<<a<<" "<<b<<" ";
- if(a*b==0) return 0;
- double x=(double) a/(a+b);
- //cout<<x<<" ";
- //if(x==1)
- return -x*log(x)-(1-x)*log(1-x);
- }
- };
- int main()
- {
- vector<example> examples;
- ifstream fin("data.csv");
- ofstream fout("results.txt");
- string str;
- /** Get examples from file **/
- while(fin>>str)
- {
- for(int i=0;i<str.size();i++) if(str[i]==',') str[i]=' ';
- stringstream ss;
- ss<<str;
- int arr[10];
- for(int j=0;j<10;j++) ss>>arr[j];
- example ex(arr);
- examples.push_back(ex);
- }
- /**Iterate 100 times **/
- int no_itr=10,tst=1;
- double av_ac,av_pr,av_rc,av_fm,av_gm;
- av_ac=av_pr=av_rc=av_fm=av_gm=0;
- while(tst++<=no_itr)
- {
- vector<example> train,test;
- srand(time(NULL)+rand()%187);
- for(int i=0;i<examples.size();i++)
- {
- if(rand()%10<8) train.pb(examples[i]);
- else test.pb(examples[i]);
- }
- //cout<<train.size()<<" "<<test.size()<<endl;
- vector<example> tmp;
- for(int i=0;i<6;i++) tmp.pb(examples[i]);
- decision_tree tree;
- tree.make_tree(train);
- //cout<<tree.get_result(test[0])<<endl;
- int pos,neg,tp,tn,fp,fn;
- pos=neg=tp=tn=fp=fn=0;
- for(int i=0;i<test.size();i++)
- {
- int tv=test[i].vals[9];
- if(tv) pos++;
- else neg++;
- int mv=tree.get_result(test[i]);
- if(mv==tv)
- {
- if(mv) tp++;
- else tn++;
- }
- else
- {
- if(mv) fn++;
- else fp++;
- }
- }
- //cout<<pos<<" "<<neg<<" "<<tp<<" "<<tn<<" "<<fp<<" "<<fn<<endl;
- fout<<"Test no: "<<tst-1<<"\n------------------------\n";
- fout<<"positive examples: "<<pos<<endl;
- fout<<"negative examples: "<<neg<<endl;
- fout<<"positive detected correctly: "<<tp<<endl;
- fout<<"negative detected correctly: "<<tn<<endl;
- fout<<"false positive: "<<fp<<endl;
- fout<<"false negative: "<<fn<<endl;
- fout<<"accuracy: ";
- double ac=(double)(tp+tn)/(pos+neg);av_ac+=ac;
- fout<<ac<<endl;
- fout<<"precision: ";
- double pr=(double)tp/(tp+fp);av_pr+=pr;
- fout<<pr<<endl;
- fout<<"recall: ";
- double rc=(double)tp/pos;av_rc+=rc;
- fout<<rc<<endl;
- fout<<"f-measure: ";
- double fm=(double)2*pr*rc/(pr+rc);av_fm+=fm;
- fout<<fm<<endl;
- fout<<"g-mean: ";
- double gm=sqrt(pr*rc);av_gm+=gm;
- fout<<gm<<endl;
- fout<<endl;
- }
- fout<<"average performance\n-------------------\n";
- fout<<"average accuracy: "<<av_ac/no_itr<<endl;
- fout<<"average precision: "<<av_pr/no_itr<<endl;
- fout<<"average recall: "<<av_rc/no_itr<<endl;
- fout<<"average f-measure: "<<av_fm/no_itr<<endl;
- fout<<"average g-mean: "<<av_gm/no_itr<<endl;
- return 0;
- }
Add Comment
Please, Sign In to add comment