Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #pragma comment(linker, "/STACK:16777216")
- #include<stdio.h>
- #include<string.h>
- #include<math.h>
- #include<stdlib.h>
- #include<ctype.h>
- #include<assert.h>
- #include<iostream>
- #include<vector>
- #include<stack>
- #include<queue>
- #include<set>
- #include<map>
- #include<string>
- #include<utility>
- #include<algorithm>
- #include<list>
- using namespace std;
- #define CLR(a) memset(a,0,sizeof(a))
- #define SET(a) memset(a,-1,sizeof(a))
- #define pb push_back
- #define SZ(a) ((long)a.size())
- #define ALL(a) a.begin(),a.end()
- #define FOREACH(i, c) for( __typeof( (c).begin() ) i = (c).begin(); i != (c).end(); ++i )
- #define AREA2(x1,y1,x2,y2,x3,y3) ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) )
- #define SQR(x) ((x)*(x))
- #define STR string
- #define IT iterator
- #define ff first
- #define ss second
- #define MP make_pair
- #define EPS 1e-9
- #define INF 1000000007
- #define chk(a,k) ((bool)(a&(1<<(k))))
- #define set0(a,k) (a&(~(1<<(k))))
- #define set1(a,k) (a|(1<<(k)))
- typedef long long Long;
- typedef vector<long> Vl;
- typedef vector<double> VD;
- typedef vector<Long> VL;
- typedef pair<long,long> Pll;
- typedef pair<Long,Long> PLL;
- inline long FastMax(long x, long y) { return (((y-x)>>(32-1))&(x^y))^y; }
- inline long FastMin(long x, long y) { return (((y-x)>>(32-1))&(x^y))^x; }
- long IR[] = { 0,-1,0,1,-1,-1,1,1 };
- long IC[] = { 1,0,-1,0,1,-1,-1,1 };
- #define MAX_VAL 10
- struct NODE{
- long cls_type;
- long attr_type;
- vector<NODE*> child;
- NODE( long cls_type ):cls_type( cls_type ){}
- NODE( long attr_type, long tot_value ):attr_type( attr_type )
- {
- child.resize( tot_value+1 );
- }
- bool is_leaf( void ){ return !child.size(); }
- };
- vector<Vl> input_data;
- Pll calc_amount( vector<Vl> data )
- {
- Pll cnt;
- long i;
- for( i=0;i<data.size();i++ ){
- if( data[i][9]==0 ) cnt.ff++;
- else cnt.ss++;
- }
- return cnt;
- }
- double calc_entropy( vector<Vl> data )
- {
- Pll cnt = calc_amount( data );
- double p0 = (double)cnt.ff/( cnt.ff+cnt.ss );
- double p1 = (double)cnt.ss/( cnt.ff+cnt.ss );
- return -p0*log( p0 ) - p1*log( p1 );
- }
- double calc_info_gain( vector<Vl> data, long a )
- {
- double en = calc_entropy( data );
- vector<Vl> part[MAX_VAL+1];
- long i;
- for( i=0;i<data.size();i++ ) part[data[i][a]].pb( data[i] );
- double ig = en;
- for( i=1;i<=MAX_VAL;i++ ){
- ig -= part[i].size() * calc_entropy( part[i] ) / data.size();
- }
- return ig;
- }
- NODE* build_ID3( vector<Vl> data, Vl attr )
- {
- Pll cnt = calc_amount( data );
- if( cnt.ff==data.size() ) return new NODE( 0 );
- if( cnt.ss==data.size() ) return new NODE( 1 );
- if( !attr.size() ) return new NODE( ( cnt.ff >= cnt.ss ) ? 0:1 );
- long i,w = -1;
- VD info_gain;
- for( i=0;i<attr.size();i++ ) info_gain.pb( calc_info_gain( data,attr[i] ) );
- for( i=0;i<attr.size();i++ ) w = ( w==-1 or info_gain[w] < info_gain[i] ) ? i:w;
- w = attr[w];
- attr.erase( find( ALL( attr ),w ) );
- vector<Vl> part[MAX_VAL+1];
- for( i=0;i<data.size();i++ ) part[data[i][w]].pb( data[i] );
- NODE *cur_node = new NODE( w,MAX_VAL );
- for( i=1;i<=10;i++ ){
- if( part[i].size() ) cur_node->child[i] = build_ID3( part[i],attr );
- else cur_node->child[i] = new NODE( (cnt.ff >= cnt.ss ) ? 0:1 );
- }
- return cur_node;
- }
- long find_class( NODE *r, Vl attr_val )
- {
- if( r->is_leaf()) return r->cls_type;
- else{
- long a = r->attr_type;
- return find_class( r->child[attr_val[a]], attr_val );
- }
- }
- void read_data( void )
- {
- char str[107];
- freopen("data.csv","r",stdin );
- while( gets( str ) ){
- char *p = strtok( str,", " );
- vector<long> v;
- while( p ){
- v.pb( atol( p ) );
- p = strtok( NULL,", " );
- }
- input_data.pb( v );
- }
- }
- void partition_data( vector<Vl> v, vector<Vl> &v1,vector<Vl> &v2, double percentage )
- {
- long i,n = percentage*v.size();
- v1.clear();
- v2.clear();
- for( i=0;i<n;i++ ) v1.pb( v[i] );
- for( ;i<v.size();i++ ) v2.pb( v[i] );
- }
- VD analysis( long result[2][2] )
- {
- long tp = result[0][0];
- long fp = result[0][1];
- long fn = result[1][0];
- long tn = result[1][1];
- vector<double> v;
- v.pb( 1.0*( tp + tn )/( tp + fp + fn + tn ) );
- v.pb( 1.0*tp/( tp + fp ) );
- v.pb( 1.0*tp/( tp + fn ) );
- v.pb( 2.0*v[1]*v[2]/( v[1] + v[2] ) );
- v.pb( 100*sqrt( 1.0*tp*tn )/( ( tp + fn )*( tn + fp ) ) );
- return v;
- }
- void print( VD anal_info )
- {
- printf("Accuracy %.4lf\n",anal_info[0] );
- printf("Precision %.4lf\n",anal_info[1] );
- printf("Recall %.4lf\n",anal_info[2] );
- printf("F-measure %.4lf\n",anal_info[3] );
- printf("G mean %.4lf\n",anal_info[4] );
- printf("\n");
- }
- int main( void )
- {
- long i,j,Icase,k = 0;
- //freopen("text1.txt","r",stdin );
- read_data();
- VD anal_info;
- for( i=1;i<=10;i++ ){
- vector<Vl> train_data,test_data;
- long result[2][2] = {0};
- partition_data( input_data, train_data, test_data, 0.8 );
- vector<long> attr;
- for( j=0;j<9;j++ ) attr.pb( j );
- NODE *root = build_ID3( train_data, attr );
- for( j=0;j<test_data.size();j++ ){
- long t = find_class( root, test_data[j] );
- result[t][test_data[j][9]]++;
- }
- VD v = analysis( result );
- if( i==1 ){
- anal_info = v;
- // print( anal_info );
- }
- else{
- for( j=0;j<anal_info.size();j++ ){
- anal_info[j] += v[j];
- }
- }
- random_shuffle( ALL( input_data ) );
- }
- for( j=0;j<anal_info.size();j++ ){
- anal_info[j] /= 10;
- }
- print( anal_info );
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement