Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <math.h>
- struct mft
- {
- int nr;
- char ch;
- struct mft *next_elem;
- struct mft *next_level;
- };
- typedef struct mft mytree;
- typedef struct{
- int nr;
- mytree *elem;
- }ss;
- ss search(mytree *node, char ch);
- mytree *add_node(char mychar);
- void compute_H(mytree *node, double len, int level, double *H, int ngramOPword, int nr_words);
- void myFree(mytree *node);
- int main(int argc, char **argv){
- int len,i,j,LF,wn,lw;
- FILE *inFile, *outFile;
- char *buffer, *myChar;
- inFile = fopen(argv[i],"r");
- outFile = fopen("./outFile.txt","w");
- fseek(inFile,0,SEEK_END);
- len = ftell(inFile);
- rewind(inFile);
- if((*buffer=malloc(len*sizeof(char))==NULL)){ printf("Eroare alocare de memorie"); return(1);}
- fread(buffer,len, 1, inFile);
- i=0;
- LF=0;
- wn=0;
- lw=0;
- j=0;
- while(i<len-1){
- myChar=buffer+i;
- if(*myChar ==32 || (*myChar>=65 && *myChar<=90) || (*myChar >=97 && *myChar<=122) || (*myChar >= 48 && *myChar<=57)){
- if(*myChar!=32)
- j++;
- if(*myChar == 32 || i == len-2){
- if(j>lw) lw = j;
- j=0;
- }
- if(LF!=0 && *myChar!=32 || *myChar==32 && *(myChar+1)!=32 && i!=0){
- fprintf(outFile," ");
- wn++;
- if(j>lw) lw=j;
- j++;
- }
- LF=0;
- if(*myChar!=32){
- if(*myChar > 90) fprintf(outFile,"%c",myChar-32);
- else
- fprintf(outFile,"%c",myChar);
- }
- }
- if( *myChar==10) LF++;
- i++;
- }
- wn+=1;
- fclose(inFile);
- fclose(outFile);
- free(buffer);
- inFile=fopen("outFile.txt","r");
- fseek(inFile,0,SEEK_END);
- len=ftell(inFile);
- rewind(inFile);
- if((*buffer=malloc(len*sizeof(char))==NULL)){printf("eroare alocare de memorie\n");return(1);}
- fread(buffer,len,1,inFile);
- fclose(inFile);
- int depth=atoi(argv[2]);
- mytree *head=NULL;
- mytree *node=NULL;
- mytree *prevel=NULL;
- ss myss;
- printf("number of characters in text = %i \n",len);
- printf("longest word=%i\n",lw);
- printf("total number of words=%i\n,",wn);
- if(depth>len){ depth=len; printf("depth adjusted to %i",depth);}
- int tmp_depth=depth;
- for(i=0;i<len;i++){
- if(i>len-tmp_depth) tmp_depth--;
- node=head;
- for(j=0;j<tmp_depth;j++){
- myss=search(node, *(buffer+i+j));
- if(myss.nr==0)
- node=myss.elem;
- if(myss.nr==1){
- node = NULL;
- prevel=myss.elem;
- }
- if(node == NULL){
- node = add_node(*(buffer+i+j));
- if(head==NULL) head = node;
- if(prevel!=NULL) prevel->next_elem=node;
- }
- if(j<tmp_depth-1){
- if(node->next_level==NULL)
- {
- node->next_level=add_node(*(buffer+i+j));
- node->next_level->nr=0;
- }
- prevel=node;
- node=node->next_level;
- }
- }
- }
- double *H=malloc(depth*sizeof(double));
- for(i=0;i<depth;i++) H[i]=0;
- compute_H(head, (double)len, 0, H, 0, wn);
- for(i=0;i<depth;i++){
- if(i!=0) printf("H[%i] = %lf\n",i,H[i]-H[i-1]);
- else
- printf("H[%i] = %lf\n",i,H[i]);
- }
- myFree(head);
- i=0;
- head=NULL;
- node=head;
- while(i<len){
- myss=search(node, *(buffer+i));
- if(myss.nr==0) node =myss.elem;
- if(myss.nr==1){
- node=NULL;
- prevel=myss.elem;
- }
- if(node ==NULL){
- node=add_node(*(buffer+i));
- if(head==NULL) head=node;
- if(prevel!=NULL) prevel->next_elem=node;
- }
- if(i<len-2 && *(buffer+i+1)==32){
- node=head;
- i++;
- }else if(i<len-1){
- if(node->next_level==NULL){
- node->next_level=add_node(*(buffer+i+1));
- node->next_level->nr=0;
- }
- prevel=node;
- node=node->next_level;
- }
- i++;
- }
- double *Hw=malloc(sizeof(double));
- *Hw=0;
- compute_H(head, (double)len,0,Hw,1,wn);
- printf("Hw=%lf\n",*Hw);
- free(H);
- myFree(head);
- free(buffer);
- free(Hw);
- }
- ss search(mytree *node,char ch){
- mytree *prev=node;
- ss myss;
- while(node!=NULL){
- if(node->ch==ch){
- node->nr++;
- myss.nr=0;
- myss.elem=node;
- return(myss);
- break;
- }
- prev=node;
- node=node->next_elem;
- }
- myss.nr=1;
- myss.elem=prev;
- return(myss);
- }
- mytree *add_node(char mychar){
- mytree *curr_el;
- curr_el = malloc(sizeof(mytree));
- curr_el->nr=1;
- curr_el->ch=mychar;
- curr_el->next_elem=NULL;
- curr_el->next_level=NULL;
- return(curr_el);
- }
- void compute_H(mytree *node, double len, int level, double *H, int ngramOPword, int nr_words){
- double P;
- if(node->next_elem!=NULL)
- compute_H(node->next_elem,len,level,H,ngramOPword,nr_words);
- if(node->next_level!=NULL)
- compute_H(node->next_level,len,level+1,H,ngramOPword,nr_words);
- if(ngramOPword==0){
- P=(double)node->nr/(len-level);
- H[level]-=P*log2(P);
- }
- if(ngramOPword == 1 && node->next_level==NULL){
- P=(double)node->nr/(double)nr_words;
- *H=P*log2(P);
- }
- }
- void myFree(mytree *node){
- if(node->next_elem!=NULL)
- myFree(node->next_elem);
- if(node->next_level!=NULL)
- myFree(node->next_level);
- free(node);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement