Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "opencv2/highgui/highgui.hpp"
- #include "opencv2/core/core.hpp"
- #include <iostream>
- using namespace cv;
- using namespace std;
- struct pts
- {
- float dot,high,space,num;
- bool IsSpam;
- pts()
- {
- dot=high=space=num=0;
- }
- };
- pts Str2Point(char str[])
- {
- pts ret;
- int i;
- if(str[0]=='h')
- {
- i=4;
- ret.IsSpam=0;
- }
- else if(str[0]=='s')
- {
- i=5;
- ret.IsSpam=1;
- }
- else
- {
- printf("Error str!\n");
- return ret;
- }
- for(;str[i]!=0;i++)
- {
- if((str[i]>=65) && (str[i]<91))
- ret.high++;
- if((str[i]=='.') || (str[i]==',') || (str[i]=='?') || (str[i]=='!') || (str[i]=='-') || (str[i]==':') || (str[i]==';'))
- ret.dot++;
- if(str[i]==' ')
- ret.space++;
- if((str[i]>=48) && (str[i]<58))
- ret.num++;
- }
- //ret.high/=(float)i;
- ret.high=(float)i/*/905.0*/;
- //ret.dot/=(float)i;
- //ret.num/=(float)i;
- //ret.space/=(float)i;
- ret.space=ret.high/*/(float)i*/;
- //printf("dot=%f\nspace=%f\nnum=%f\nhigh=%f\ni=%d\n\n",ret.dot,ret.space,ret.num,ret.high,i);
- //system("pause>nul");
- return ret;
- }
- pts* FRead(char* Filename, int* SCount)
- {
- FILE* f=fopen(Filename,"r");
- char tmp[1024];
- int i=0;
- while(!feof(f))
- {
- fgets(tmp,1023,f);
- *SCount=*SCount+1;
- }
- pts* P = new pts[*SCount];
- fseek(f,0,SEEK_SET);
- while(i<*SCount)
- {
- fgets(tmp,1023,f);
- P[i] = Str2Point(tmp);
- i++;
- }
- return P;
- }
- int main()
- {
- int CLUSTERS;
- int SCount=0;
- printf("Enter number of clusters: ");
- scanf("%d",&CLUSTERS);
- pts* p=FRead("F:\\Second\\Second\\SMSSpamCollection",&SCount);
- Mat points(SCount, 1, CV_32FC4), labels(SCount, 1, CV_32FC4);
- Mat centers(CLUSTERS, 1, CV_32FC4/*points.type()*/);
- for(int j=0;j<SCount;j++)
- {
- points.at<float>(j,0) = p[j].high;
- points.at<float>(j,1) = p[j].num;
- points.at<float>(j,2) = p[j].space;
- points.at<float>(j,3) = p[j].dot;
- }
- kmeans(points, CLUSTERS, labels,
- TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 20, 0.01),
- 5, KMEANS_RANDOM_CENTERS, centers);
- int* spam=new int[CLUSTERS], *ham=new int[CLUSTERS];
- for(int i=0;i<CLUSTERS;i++)
- {
- spam[i]=ham[i]=0;
- for(int j=0;j<SCount;j++)
- if(labels.at<int>(j)==i)
- {
- if(p[j].IsSpam)
- spam[i]++;
- else
- ham[i]++;
- }
- }
- for(int i=0;i<CLUSTERS;i++)
- printf("CLUSTER #%d: \n\tSPAM: %d\n\tHAM: %d\n\n",1+i,spam[i],ham[i]);
- system("pause>nul");
- return 0;
- }
Add Comment
Please, Sign In to add comment